{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DOAPVFAaE3Kq"
      },
      "source": [
        "# Agentic RAG: Factory Safety Assistant"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/agents/agentic_rag_factory_safety_assistant_with_langgraph_langchain_mongodb.ipynb)\n",
        "\n",
        "[![AI Learning Hub For Developers](https://img.shields.io/badge/AI%20Learning%20Hub%20For%20Developers-Click%20Here-blue)](https://www.mongodb.com/resources/use-cases/artificial-intelligence?utm_campaign=ai_learning_hub&utm_source=github&utm_medium=referral)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "eGYCoT_mFDQU"
      },
      "outputs": [],
      "source": [
        "%pip install --quiet datasets pandas pymongo langchain_openai"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "G23CzSyYFMrN"
      },
      "outputs": [],
      "source": [
        "import getpass\n",
        "import os\n",
        "\n",
        "\n",
        "# Function to securely get and set environment variables\n",
        "def set_env_securely(var_name, prompt):\n",
        "    value = getpass.getpass(prompt)\n",
        "    os.environ[var_name] = value"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "j829-BYvFR_s",
        "outputId": "26e8c570-aea1-4e6a-feac-4d865cb5fcb8"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Enter your OpenAI API key: ··········\n"
          ]
        }
      ],
      "source": [
        "# Non-sensitive environment variables\n",
        "OPEN_AI_EMBEDDING_MODEL = \"text-embedding-3-small\"\n",
        "OPEN_AI_EMBEDDING_MODEL_DIMENSION = 256\n",
        "\n",
        "# Uncomment below to utilize langSmith\n",
        "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
        "# os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
        "# os.environ[\"LANGCHAIN_PROJECT\"] = \"factory_safety_assistant\"\n",
        "\n",
        "# Sensitive Environment Variables\n",
        "set_env_securely(\"OPENAI_API_KEY\", \"Enter your OpenAI API key: \")\n",
        "# Uncomment below to utilize langSmith\n",
        "# set_env_securely(\"LANGCHAIN_API_KEY\", \"Enter your LangChain API key: \")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "RqVZ_dCEFXqy"
      },
      "outputs": [],
      "source": [
        "# Step 1: Data Loading\n",
        "import pandas as pd\n",
        "\n",
        "# Load the accidents dataset\n",
        "accidents_df = pd.read_json(\"accidents_incidents.json\")\n",
        "\n",
        "# Load the safety procedures datasets\n",
        "safety_df = pd.read_json(\"safety_procedures.json\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dmFUW83p8lLl",
        "outputId": "89c842b6-5ed0-4286-a8aa-0c9783160957"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Repo card metadata block was not found. Setting CardData to empty.\n",
            "WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.\n"
          ]
        }
      ],
      "source": [
        "# Step 1: Data Loading\n",
        "import pandas as pd\n",
        "from datasets import load_dataset\n",
        "\n",
        "# Make sure you have an Hugging Face token(HF_TOKEN) in your development environemnt before running the code below\n",
        "# How to get a token: https://huggingface.co/docs/hub/en/security-tokens\n",
        "\n",
        "safety_procedure_ds = load_dataset(\"MongoDB/safety_procedure_dataset\", split=\"train\")\n",
        "safety_df = pd.DataFrame(safety_procedure_ds)\n",
        "\n",
        "accident_reports_ds = load_dataset(\"MongoDB/accident_reports\", split=\"train\")\n",
        "accidents_df = pd.DataFrame(accident_reports_ds)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8GbmqE1MHV_m",
        "outputId": "a54c095b-5b5c-44aa-c2c0-18903194345c"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 100 entries, 0 to 99\n",
            "Data columns (total 9 columns):\n",
            " #   Column             Non-Null Count  Dtype         \n",
            "---  ------             --------------  -----         \n",
            " 0   incidentId         100 non-null    object        \n",
            " 1   dateTime           100 non-null    datetime64[ns]\n",
            " 2   location           100 non-null    object        \n",
            " 3   type               100 non-null    object        \n",
            " 4   description        100 non-null    object        \n",
            " 5   severityLevel      100 non-null    object        \n",
            " 6   relatedProcedures  100 non-null    object        \n",
            " 7   immediateActions   100 non-null    object        \n",
            " 8   rootCauses         100 non-null    object        \n",
            "dtypes: datetime64[ns](1), object(8)\n",
            "memory usage: 7.2+ KB\n"
          ]
        }
      ],
      "source": [
        "accidents_df.info()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 293
        },
        "id": "bZbABS0JGTXo",
        "outputId": "a70a993d-2f45-4a92-f366-27f45e26f334"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"accidents_df\",\n  \"rows\": 100,\n  \"fields\": [\n    {\n      \"column\": \"incidentId\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 100,\n        \"samples\": [\n          \"INC-2024-084\",\n          \"INC-2024-054\",\n          \"INC-2024-071\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"dateTime\",\n      \"properties\": {\n        \"dtype\": \"date\",\n        \"min\": \"2023-08-28 09:01:41.296111\",\n        \"max\": \"2024-08-20 09:01:41.295713\",\n        \"num_unique_values\": 100,\n        \"samples\": [\n          \"2024-03-15 09:01:41.296977\",\n          \"2023-09-02 09:01:41.296372\",\n          \"2024-05-18 09:01:41.296740\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"location\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"type\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"Fire Hazard\",\n          \"Height-Related Fall\",\n          \"Confined Space Incident\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"description\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 20,\n        \"samples\": [\n          \"Equipment Failure occurred at Factory B.\",\n          \"Height-Related Fall occurred at Factory B.\",\n          \"Chemical Spill occurred at Factory A.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"severityLevel\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"low\",\n          \"high\",\n          \"medium\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"relatedProcedures\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"immediateActions\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"Shut down equipment and isolated area\",\n          \"Evacuated area and provided first aid\",\n          \"Contained spill and alerted hazardous material team\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"rootCauses\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "accidents_df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-805ab318-7d4a-440e-985e-8c1e8568160c\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>incidentId</th>\n",
              "      <th>dateTime</th>\n",
              "      <th>location</th>\n",
              "      <th>type</th>\n",
              "      <th>description</th>\n",
              "      <th>severityLevel</th>\n",
              "      <th>relatedProcedures</th>\n",
              "      <th>immediateActions</th>\n",
              "      <th>rootCauses</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>INC-2024-001</td>\n",
              "      <td>2024-03-08 09:01:41.295149</td>\n",
              "      <td>{'region': 'East', 'site': 'Factory B'}</td>\n",
              "      <td>Equipment Failure</td>\n",
              "      <td>Equipment Failure occurred at Factory B.</td>\n",
              "      <td>low</td>\n",
              "      <td>[CHEM-012]</td>\n",
              "      <td>Contained spill and alerted hazardous material...</td>\n",
              "      <td>[{'category': 'procedural error', 'description...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>INC-2024-002</td>\n",
              "      <td>2024-02-05 09:01:41.295225</td>\n",
              "      <td>{'region': 'East', 'site': 'Warehouse C'}</td>\n",
              "      <td>Fire Hazard</td>\n",
              "      <td>Fire Hazard occurred at Warehouse C.</td>\n",
              "      <td>high</td>\n",
              "      <td>[CHEM-021, CONF-001]</td>\n",
              "      <td>Shut down equipment and isolated area</td>\n",
              "      <td>[{'category': 'procedural error', 'description...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>INC-2024-003</td>\n",
              "      <td>2024-04-26 09:01:41.295263</td>\n",
              "      <td>{'region': 'West', 'site': 'Plant D'}</td>\n",
              "      <td>Confined Space Incident</td>\n",
              "      <td>Confined Space Incident occurred at Plant D.</td>\n",
              "      <td>low</td>\n",
              "      <td>[CONF-031, CONF-028, CHEM-021]</td>\n",
              "      <td>Ventilated space and removed worker</td>\n",
              "      <td>[{'category': 'environmental factors', 'descri...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>INC-2024-004</td>\n",
              "      <td>2024-04-29 09:01:41.295283</td>\n",
              "      <td>{'region': 'North', 'site': 'Warehouse C'}</td>\n",
              "      <td>Equipment Failure</td>\n",
              "      <td>Equipment Failure occurred at Warehouse C.</td>\n",
              "      <td>high</td>\n",
              "      <td>[CONF-046, CONF-049]</td>\n",
              "      <td>Contained spill and alerted hazardous material...</td>\n",
              "      <td>[{'category': 'procedural error', 'description...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>INC-2024-005</td>\n",
              "      <td>2024-05-16 09:01:41.295300</td>\n",
              "      <td>{'region': 'West', 'site': 'Warehouse C'}</td>\n",
              "      <td>Fire Hazard</td>\n",
              "      <td>Fire Hazard occurred at Warehouse C.</td>\n",
              "      <td>high</td>\n",
              "      <td>[CONF-043, HEIGHTS-020, CONF-007]</td>\n",
              "      <td>Contained spill and alerted hazardous material...</td>\n",
              "      <td>[{'category': 'equipment failure', 'descriptio...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-805ab318-7d4a-440e-985e-8c1e8568160c')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-805ab318-7d4a-440e-985e-8c1e8568160c button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-805ab318-7d4a-440e-985e-8c1e8568160c');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-81cca637-dc99-4cb4-9be5-468c3b123fc1\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-81cca637-dc99-4cb4-9be5-468c3b123fc1')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-81cca637-dc99-4cb4-9be5-468c3b123fc1 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "     incidentId                   dateTime  \\\n",
              "0  INC-2024-001 2024-03-08 09:01:41.295149   \n",
              "1  INC-2024-002 2024-02-05 09:01:41.295225   \n",
              "2  INC-2024-003 2024-04-26 09:01:41.295263   \n",
              "3  INC-2024-004 2024-04-29 09:01:41.295283   \n",
              "4  INC-2024-005 2024-05-16 09:01:41.295300   \n",
              "\n",
              "                                     location                     type  \\\n",
              "0     {'region': 'East', 'site': 'Factory B'}        Equipment Failure   \n",
              "1   {'region': 'East', 'site': 'Warehouse C'}              Fire Hazard   \n",
              "2       {'region': 'West', 'site': 'Plant D'}  Confined Space Incident   \n",
              "3  {'region': 'North', 'site': 'Warehouse C'}        Equipment Failure   \n",
              "4   {'region': 'West', 'site': 'Warehouse C'}              Fire Hazard   \n",
              "\n",
              "                                    description severityLevel  \\\n",
              "0      Equipment Failure occurred at Factory B.           low   \n",
              "1          Fire Hazard occurred at Warehouse C.          high   \n",
              "2  Confined Space Incident occurred at Plant D.           low   \n",
              "3    Equipment Failure occurred at Warehouse C.          high   \n",
              "4          Fire Hazard occurred at Warehouse C.          high   \n",
              "\n",
              "                   relatedProcedures  \\\n",
              "0                         [CHEM-012]   \n",
              "1               [CHEM-021, CONF-001]   \n",
              "2     [CONF-031, CONF-028, CHEM-021]   \n",
              "3               [CONF-046, CONF-049]   \n",
              "4  [CONF-043, HEIGHTS-020, CONF-007]   \n",
              "\n",
              "                                    immediateActions  \\\n",
              "0  Contained spill and alerted hazardous material...   \n",
              "1              Shut down equipment and isolated area   \n",
              "2                Ventilated space and removed worker   \n",
              "3  Contained spill and alerted hazardous material...   \n",
              "4  Contained spill and alerted hazardous material...   \n",
              "\n",
              "                                          rootCauses  \n",
              "0  [{'category': 'procedural error', 'description...  \n",
              "1  [{'category': 'procedural error', 'description...  \n",
              "2  [{'category': 'environmental factors', 'descri...  \n",
              "3  [{'category': 'procedural error', 'description...  \n",
              "4  [{'category': 'equipment failure', 'descriptio...  "
            ]
          },
          "execution_count": 13,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "accidents_df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fOK7Gq0WHYPc",
        "outputId": "db1b25b1-b346-4e8e-b7da-947e6ae5d95b"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 50 entries, 0 to 49\n",
            "Data columns (total 6 columns):\n",
            " #   Column       Non-Null Count  Dtype \n",
            "---  ------       --------------  ----- \n",
            " 0   procedureId  50 non-null     object\n",
            " 1   title        50 non-null     object\n",
            " 2   description  50 non-null     object\n",
            " 3   category     50 non-null     object\n",
            " 4   steps        50 non-null     object\n",
            " 5   lastUpdated  50 non-null     object\n",
            "dtypes: object(6)\n",
            "memory usage: 2.5+ KB\n"
          ]
        }
      ],
      "source": [
        "safety_df.info()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "CCnDOrFoHM5k",
        "outputId": "20f107bf-9500-4e13-c996-8f4e1160d0d8"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"safety_df\",\n  \"rows\": 50,\n  \"fields\": [\n    {\n      \"column\": \"procedureId\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 50,\n        \"samples\": [\n          \"HEIGHTS-014\",\n          \"CONF-040\",\n          \"CONF-031\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"title\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 15,\n        \"samples\": [\n          \"Confined Space Entry Procedure\",\n          \"Chemical Handling Procedure\",\n          \"Confined Space Communication Protocol\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"description\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 15,\n        \"samples\": [\n          \"Guidelines for confined space entry procedure\",\n          \"Guidelines for chemical handling procedure\",\n          \"Guidelines for confined space communication protocol\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"category\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"confined space\",\n          \"working at heights\",\n          \"chemical handling\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"steps\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"lastUpdated\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"num_unique_values\": 50,\n        \"samples\": [\n          \"2023-09-24T08:53:38.622112\",\n          \"2024-07-26T08:53:38.622395\",\n          \"2023-11-26T08:53:38.622288\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "safety_df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-f6560c17-728f-4ea0-8878-020e03b537ca\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>procedureId</th>\n",
              "      <th>title</th>\n",
              "      <th>description</th>\n",
              "      <th>category</th>\n",
              "      <th>steps</th>\n",
              "      <th>lastUpdated</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>CONF-001</td>\n",
              "      <td>Confined Space Communication Protocol</td>\n",
              "      <td>Guidelines for confined space communication pr...</td>\n",
              "      <td>confined space</td>\n",
              "      <td>[{'description': 'Use appropriate PPE', 'stepN...</td>\n",
              "      <td>2024-01-13T08:53:38.621899</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>HEIGHTS-002</td>\n",
              "      <td>Scaffold Safety Procedure</td>\n",
              "      <td>Guidelines for scaffold safety procedure</td>\n",
              "      <td>working at heights</td>\n",
              "      <td>[{'description': 'Ensure fall protection gear ...</td>\n",
              "      <td>2023-12-26T08:53:38.621930</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>CHEM-003</td>\n",
              "      <td>Chemical Spill Response Procedure</td>\n",
              "      <td>Guidelines for chemical spill response procedure</td>\n",
              "      <td>chemical handling</td>\n",
              "      <td>[{'description': 'Use proper ventilation', 'st...</td>\n",
              "      <td>2024-04-27T08:53:38.621945</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>CONF-004</td>\n",
              "      <td>Advanced Confined Space Safety</td>\n",
              "      <td>Guidelines for advanced confined space safety</td>\n",
              "      <td>confined space</td>\n",
              "      <td>[{'description': 'Assess the confined space fo...</td>\n",
              "      <td>2024-03-31T08:53:38.621957</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>HEIGHTS-005</td>\n",
              "      <td>Fall Protection Procedure</td>\n",
              "      <td>Guidelines for fall protection procedure</td>\n",
              "      <td>working at heights</td>\n",
              "      <td>[{'description': 'Ensure fall protection gear ...</td>\n",
              "      <td>2024-07-29T08:53:38.621969</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f6560c17-728f-4ea0-8878-020e03b537ca')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-f6560c17-728f-4ea0-8878-020e03b537ca button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-f6560c17-728f-4ea0-8878-020e03b537ca');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-edf533b3-68bb-4784-8130-0fbfcca632a6\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-edf533b3-68bb-4784-8130-0fbfcca632a6')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-edf533b3-68bb-4784-8130-0fbfcca632a6 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "   procedureId                                  title  \\\n",
              "0     CONF-001  Confined Space Communication Protocol   \n",
              "1  HEIGHTS-002              Scaffold Safety Procedure   \n",
              "2     CHEM-003      Chemical Spill Response Procedure   \n",
              "3     CONF-004         Advanced Confined Space Safety   \n",
              "4  HEIGHTS-005              Fall Protection Procedure   \n",
              "\n",
              "                                         description            category  \\\n",
              "0  Guidelines for confined space communication pr...      confined space   \n",
              "1           Guidelines for scaffold safety procedure  working at heights   \n",
              "2   Guidelines for chemical spill response procedure   chemical handling   \n",
              "3      Guidelines for advanced confined space safety      confined space   \n",
              "4           Guidelines for fall protection procedure  working at heights   \n",
              "\n",
              "                                               steps  \\\n",
              "0  [{'description': 'Use appropriate PPE', 'stepN...   \n",
              "1  [{'description': 'Ensure fall protection gear ...   \n",
              "2  [{'description': 'Use proper ventilation', 'st...   \n",
              "3  [{'description': 'Assess the confined space fo...   \n",
              "4  [{'description': 'Ensure fall protection gear ...   \n",
              "\n",
              "                  lastUpdated  \n",
              "0  2024-01-13T08:53:38.621899  \n",
              "1  2023-12-26T08:53:38.621930  \n",
              "2  2024-04-27T08:53:38.621945  \n",
              "3  2024-03-31T08:53:38.621957  \n",
              "4  2024-07-29T08:53:38.621969  "
            ]
          },
          "execution_count": 18,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "safety_df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {
        "id": "ZUKoRigjHQXq"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "\n",
        "\n",
        "def combine_attributes(df, attributes):\n",
        "    \"\"\"\n",
        "    Combine specified attributes of a DataFrame into a single column,\n",
        "    converting all attributes to strings and handling various data types.\n",
        "\n",
        "    Parameters:\n",
        "    df (pandas.DataFrame): The input DataFrame\n",
        "    attributes (list): List of column names to combine\n",
        "\n",
        "    Returns:\n",
        "    pandas.DataFrame: The input DataFrame with an additional 'combined_info' column\n",
        "    \"\"\"\n",
        "\n",
        "    def combine_row(row):\n",
        "        combined = []\n",
        "        for attr in attributes:\n",
        "            if attr in row.index:\n",
        "                value = row[attr]\n",
        "                if isinstance(value, (pd.Series, np.ndarray, list)):\n",
        "                    # Handle array-like objects\n",
        "                    if len(value) > 0 and not pd.isna(value).all():\n",
        "                        combined.append(f\"{attr.capitalize()}: {value!s}\")\n",
        "                elif not pd.isna(value):\n",
        "                    combined.append(f\"{attr.capitalize()}: {value!s}\")\n",
        "        return \" \".join(combined)\n",
        "\n",
        "    df[\"combined_info\"] = df.apply(combine_row, axis=1)\n",
        "    return df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 20,
      "metadata": {
        "id": "P3_VjnEXIiys"
      },
      "outputs": [],
      "source": [
        "accident_attributes_to_combine = [\n",
        "    \"type\",\n",
        "    \"description\",\n",
        "    \"immediateActions\",\n",
        "    \"rootCauses\",\n",
        "]\n",
        "accidents_df = combine_attributes(accidents_df, accident_attributes_to_combine)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 21,
      "metadata": {
        "id": "Fvld3nBGJyEh"
      },
      "outputs": [],
      "source": [
        "safety_procedures_attributes_to_combine = [\"title\", \"description\", \"category\", \"steps\"]\n",
        "safety_df = combine_attributes(safety_df, safety_procedures_attributes_to_combine)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 22,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Ptp9ytHYJOGj",
        "outputId": "e2f4631a-2f00-45a2-e6cb-2a51f8a6e670"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Type: Equipment Failure Description: Equipment Failure occurred at Factory B. Immediateactions: Contained spill and alerted hazardous material team Rootcauses: [{'category': 'procedural error', 'description': 'Inadequate safety checks', 'preventionRecommendations': 'Review and update safety procedures'}]\n"
          ]
        }
      ],
      "source": [
        "first_datapoint_accident = accidents_df.iloc[0]\n",
        "print(first_datapoint_accident[\"combined_info\"])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 23,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Wcsd_2PWJOdT",
        "outputId": "f2a04758-bed1-4d47-fcaa-f94487eb85d6"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Title: Confined Space Communication Protocol Description: Guidelines for confined space communication protocol Category: confined space Steps: [{'description': 'Use appropriate PPE', 'stepNumber': 1}, {'description': 'Assess the confined space for hazards', 'stepNumber': 2}, {'description': 'Obtain necessary permits', 'stepNumber': 3}, {'description': 'Monitor the atmosphere', 'stepNumber': 4}]\n"
          ]
        }
      ],
      "source": [
        "first_datapoint_safety = safety_df.iloc[0]\n",
        "print(first_datapoint_safety[\"combined_info\"])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 24,
      "metadata": {
        "id": "OgeecfjHKV0Y"
      },
      "outputs": [],
      "source": [
        "import tiktoken\n",
        "from langchain_openai import OpenAIEmbeddings\n",
        "from tqdm import tqdm\n",
        "\n",
        "MAX_TOKENS = 8191  # Maximum tokens for text-embedding-3-small\n",
        "OVERLAP = 50\n",
        "\n",
        "# Load the embedding model\n",
        "embedding_model = OpenAIEmbeddings(\n",
        "    model=OPEN_AI_EMBEDDING_MODEL, dimensions=OPEN_AI_EMBEDDING_MODEL_DIMENSION\n",
        ")\n",
        "\n",
        "\n",
        "def num_tokens_from_string(string: str, encoding_name: str = \"cl100k_base\") -> int:\n",
        "    \"\"\"Returns the number of tokens in a text string.\"\"\"\n",
        "    encoding = tiktoken.get_encoding(encoding_name)\n",
        "    num_tokens = len(encoding.encode(string))\n",
        "    return num_tokens\n",
        "\n",
        "\n",
        "def chunk_text(text, max_tokens=MAX_TOKENS, overlap=OVERLAP):\n",
        "    \"\"\"\n",
        "    Split the text into overlapping chunks based on token count.\n",
        "    \"\"\"\n",
        "    encoding = tiktoken.get_encoding(\"cl100k_base\")\n",
        "    tokens = encoding.encode(text)\n",
        "    chunks = []\n",
        "    for i in range(0, len(tokens), max_tokens - overlap):\n",
        "        chunk_tokens = tokens[i : i + max_tokens]\n",
        "        chunk = encoding.decode(chunk_tokens)\n",
        "        chunks.append(chunk)\n",
        "    return chunks\n",
        "\n",
        "\n",
        "def get_embedding(input_data, model=OPEN_AI_EMBEDDING_MODEL):\n",
        "    \"\"\"\n",
        "    Generate embeddings for the 'combined_attributes' column and duplicate the row for each chunk\n",
        "    or generate embeddings for a given string.\n",
        "    \"\"\"\n",
        "    if isinstance(input_data, str):\n",
        "        text = input_data\n",
        "    else:\n",
        "        text = input_data[\"combined_info\"]\n",
        "\n",
        "    if not text.strip():\n",
        "        print(\"Attempted to get embedding for empty text.\")\n",
        "        return []\n",
        "\n",
        "    # Split text into chunks if it's too long\n",
        "    chunks = chunk_text(text)\n",
        "\n",
        "    # Embed each chunk\n",
        "    chunk_embeddings = []\n",
        "    for chunk in chunks:\n",
        "        chunk = chunk.replace(\"\\n\", \" \")\n",
        "        embedding = embedding_model.embed_query(text=chunk)\n",
        "        chunk_embeddings.append(embedding)\n",
        "\n",
        "    if isinstance(input_data, str):\n",
        "        # Return list of embeddings for string input\n",
        "        return chunk_embeddings[0]\n",
        "    # Create duplicated rows for each chunk with the respective embedding for row input\n",
        "    duplicated_rows = []\n",
        "    for embedding in chunk_embeddings:\n",
        "        new_row = input_data.copy()\n",
        "        new_row[\"embedding\"] = embedding\n",
        "        duplicated_rows.append(new_row)\n",
        "    return duplicated_rows"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 25,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "I8jFwbX1K8x5",
        "outputId": "5bedf2b3-55a5-45c5-8091-51bd5d475a12"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Generating embeddings and duplicating rows: 100%|██████████| 100/100 [00:22<00:00,  4.52it/s]\n"
          ]
        }
      ],
      "source": [
        "# Apply the function and expand the dataset\n",
        "duplicated_data_accidents = []\n",
        "for _, row in tqdm(\n",
        "    accidents_df.iterrows(),\n",
        "    desc=\"Generating embeddings and duplicating rows\",\n",
        "    total=len(accidents_df),\n",
        "):\n",
        "    duplicated_rows = get_embedding(row)\n",
        "    duplicated_data_accidents.extend(duplicated_rows)\n",
        "\n",
        "# Create a new DataFrame from the duplicated data\n",
        "accidents_df = pd.DataFrame(duplicated_data_accidents)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 26,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CkqA2-57K9VT",
        "outputId": "3a69f94c-89ba-4c2e-bcda-3e23a3c7ff1a"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Generating embeddings and duplicating rows: 100%|██████████| 50/50 [00:09<00:00,  5.48it/s]\n"
          ]
        }
      ],
      "source": [
        "# Apply the function and expand the dataset\n",
        "duplicated_data_safey = []\n",
        "for _, row in tqdm(\n",
        "    safety_df.iterrows(),\n",
        "    desc=\"Generating embeddings and duplicating rows\",\n",
        "    total=len(safety_df),\n",
        "):\n",
        "    duplicated_rows = get_embedding(row)\n",
        "    duplicated_data_safey.extend(duplicated_rows)\n",
        "\n",
        "# Create a new DataFrame from the duplicated data\n",
        "safety_df = pd.DataFrame(duplicated_data_safey)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 27,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 432
        },
        "id": "gkKH5troMJPB",
        "outputId": "c07d06a7-e0ec-4aca-a606-47f5b966ea7f"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"accidents_df\",\n  \"rows\": 100,\n  \"fields\": [\n    {\n      \"column\": \"incidentId\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 100,\n        \"samples\": [\n          \"INC-2024-084\",\n          \"INC-2024-054\",\n          \"INC-2024-071\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"dateTime\",\n      \"properties\": {\n        \"dtype\": \"date\",\n        \"min\": \"2023-08-28 09:01:41.296111\",\n        \"max\": \"2024-08-20 09:01:41.295713\",\n        \"num_unique_values\": 100,\n        \"samples\": [\n          \"2024-03-15 09:01:41.296977\",\n          \"2023-09-02 09:01:41.296372\",\n          \"2024-05-18 09:01:41.296740\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"location\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"type\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 5,\n        \"samples\": [\n          \"Fire Hazard\",\n          \"Height-Related Fall\",\n          \"Confined Space Incident\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"description\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 20,\n        \"samples\": [\n          \"Equipment Failure occurred at Factory B.\",\n          \"Height-Related Fall occurred at Factory B.\",\n          \"Chemical Spill occurred at Factory A.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"severityLevel\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"low\",\n          \"high\",\n          \"medium\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"relatedProcedures\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"immediateActions\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          \"Shut down equipment and isolated area\",\n          \"Evacuated area and provided first aid\",\n          \"Contained spill and alerted hazardous material team\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"rootCauses\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"combined_info\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 100,\n        \"samples\": [\n          \"Type: Confined Space Incident Description: Confined Space Incident occurred at Warehouse C. Immediateactions: Evacuated area and provided first aid Rootcauses: [{'category': 'environmental factors', 'description': 'Equipment malfunctioned during operation', 'preventionRecommendations': 'Enhance equipment maintenance protocols'}, {'category': 'environmental factors', 'description': 'Equipment malfunctioned during operation', 'preventionRecommendations': 'Enhance equipment maintenance protocols'}]\",\n          \"Type: Chemical Spill Description: Chemical Spill occurred at Factory B. Immediateactions: Evacuated area and provided first aid Rootcauses: [{'category': 'environmental factors', 'description': 'Procedural step missed by worker', 'preventionRecommendations': 'Enhance equipment maintenance protocols'}, {'category': 'equipment failure', 'description': 'Equipment malfunctioned during operation', 'preventionRecommendations': 'Enhance equipment maintenance protocols'}, {'category': 'environmental factors', 'description': 'Equipment malfunctioned during operation', 'preventionRecommendations': 'Enhance equipment maintenance protocols'}]\",\n          \"Type: Chemical Spill Description: Chemical Spill occurred at Plant D. Immediateactions: Ventilated space and removed worker Rootcauses: [{'category': 'human error', 'description': 'Environmental hazard not identified', 'preventionRecommendations': 'Review and update safety procedures'}]\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embedding\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "accidents_df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-9a12b4f8-9920-402c-8838-baeb24779f5f\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>incidentId</th>\n",
              "      <th>dateTime</th>\n",
              "      <th>location</th>\n",
              "      <th>type</th>\n",
              "      <th>description</th>\n",
              "      <th>severityLevel</th>\n",
              "      <th>relatedProcedures</th>\n",
              "      <th>immediateActions</th>\n",
              "      <th>rootCauses</th>\n",
              "      <th>combined_info</th>\n",
              "      <th>embedding</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>INC-2024-001</td>\n",
              "      <td>2024-03-08 09:01:41.295149</td>\n",
              "      <td>{'region': 'East', 'site': 'Factory B'}</td>\n",
              "      <td>Equipment Failure</td>\n",
              "      <td>Equipment Failure occurred at Factory B.</td>\n",
              "      <td>low</td>\n",
              "      <td>[CHEM-012]</td>\n",
              "      <td>Contained spill and alerted hazardous material...</td>\n",
              "      <td>[{'category': 'procedural error', 'description...</td>\n",
              "      <td>Type: Equipment Failure Description: Equipment...</td>\n",
              "      <td>[-0.04604925215244293, 0.12573133409023285, 0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>INC-2024-002</td>\n",
              "      <td>2024-02-05 09:01:41.295225</td>\n",
              "      <td>{'region': 'East', 'site': 'Warehouse C'}</td>\n",
              "      <td>Fire Hazard</td>\n",
              "      <td>Fire Hazard occurred at Warehouse C.</td>\n",
              "      <td>high</td>\n",
              "      <td>[CHEM-021, CONF-001]</td>\n",
              "      <td>Shut down equipment and isolated area</td>\n",
              "      <td>[{'category': 'procedural error', 'description...</td>\n",
              "      <td>Type: Fire Hazard Description: Fire Hazard occ...</td>\n",
              "      <td>[-0.04193640872836113, 0.05664677545428276, 0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>INC-2024-003</td>\n",
              "      <td>2024-04-26 09:01:41.295263</td>\n",
              "      <td>{'region': 'West', 'site': 'Plant D'}</td>\n",
              "      <td>Confined Space Incident</td>\n",
              "      <td>Confined Space Incident occurred at Plant D.</td>\n",
              "      <td>low</td>\n",
              "      <td>[CONF-031, CONF-028, CHEM-021]</td>\n",
              "      <td>Ventilated space and removed worker</td>\n",
              "      <td>[{'category': 'environmental factors', 'descri...</td>\n",
              "      <td>Type: Confined Space Incident Description: Con...</td>\n",
              "      <td>[-0.0865219384431839, 0.0783221423625946, 0.11...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>INC-2024-004</td>\n",
              "      <td>2024-04-29 09:01:41.295283</td>\n",
              "      <td>{'region': 'North', 'site': 'Warehouse C'}</td>\n",
              "      <td>Equipment Failure</td>\n",
              "      <td>Equipment Failure occurred at Warehouse C.</td>\n",
              "      <td>high</td>\n",
              "      <td>[CONF-046, CONF-049]</td>\n",
              "      <td>Contained spill and alerted hazardous material...</td>\n",
              "      <td>[{'category': 'procedural error', 'description...</td>\n",
              "      <td>Type: Equipment Failure Description: Equipment...</td>\n",
              "      <td>[-0.022067412734031677, 0.09491231292486191, 0...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>INC-2024-005</td>\n",
              "      <td>2024-05-16 09:01:41.295300</td>\n",
              "      <td>{'region': 'West', 'site': 'Warehouse C'}</td>\n",
              "      <td>Fire Hazard</td>\n",
              "      <td>Fire Hazard occurred at Warehouse C.</td>\n",
              "      <td>high</td>\n",
              "      <td>[CONF-043, HEIGHTS-020, CONF-007]</td>\n",
              "      <td>Contained spill and alerted hazardous material...</td>\n",
              "      <td>[{'category': 'equipment failure', 'descriptio...</td>\n",
              "      <td>Type: Fire Hazard Description: Fire Hazard occ...</td>\n",
              "      <td>[-0.021989304572343826, 0.046285584568977356, ...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-9a12b4f8-9920-402c-8838-baeb24779f5f')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-9a12b4f8-9920-402c-8838-baeb24779f5f button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-9a12b4f8-9920-402c-8838-baeb24779f5f');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-3ec35ab7-6efc-4acd-b304-0f46bb1697cb\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3ec35ab7-6efc-4acd-b304-0f46bb1697cb')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-3ec35ab7-6efc-4acd-b304-0f46bb1697cb button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "     incidentId                   dateTime  \\\n",
              "0  INC-2024-001 2024-03-08 09:01:41.295149   \n",
              "1  INC-2024-002 2024-02-05 09:01:41.295225   \n",
              "2  INC-2024-003 2024-04-26 09:01:41.295263   \n",
              "3  INC-2024-004 2024-04-29 09:01:41.295283   \n",
              "4  INC-2024-005 2024-05-16 09:01:41.295300   \n",
              "\n",
              "                                     location                     type  \\\n",
              "0     {'region': 'East', 'site': 'Factory B'}        Equipment Failure   \n",
              "1   {'region': 'East', 'site': 'Warehouse C'}              Fire Hazard   \n",
              "2       {'region': 'West', 'site': 'Plant D'}  Confined Space Incident   \n",
              "3  {'region': 'North', 'site': 'Warehouse C'}        Equipment Failure   \n",
              "4   {'region': 'West', 'site': 'Warehouse C'}              Fire Hazard   \n",
              "\n",
              "                                    description severityLevel  \\\n",
              "0      Equipment Failure occurred at Factory B.           low   \n",
              "1          Fire Hazard occurred at Warehouse C.          high   \n",
              "2  Confined Space Incident occurred at Plant D.           low   \n",
              "3    Equipment Failure occurred at Warehouse C.          high   \n",
              "4          Fire Hazard occurred at Warehouse C.          high   \n",
              "\n",
              "                   relatedProcedures  \\\n",
              "0                         [CHEM-012]   \n",
              "1               [CHEM-021, CONF-001]   \n",
              "2     [CONF-031, CONF-028, CHEM-021]   \n",
              "3               [CONF-046, CONF-049]   \n",
              "4  [CONF-043, HEIGHTS-020, CONF-007]   \n",
              "\n",
              "                                    immediateActions  \\\n",
              "0  Contained spill and alerted hazardous material...   \n",
              "1              Shut down equipment and isolated area   \n",
              "2                Ventilated space and removed worker   \n",
              "3  Contained spill and alerted hazardous material...   \n",
              "4  Contained spill and alerted hazardous material...   \n",
              "\n",
              "                                          rootCauses  \\\n",
              "0  [{'category': 'procedural error', 'description...   \n",
              "1  [{'category': 'procedural error', 'description...   \n",
              "2  [{'category': 'environmental factors', 'descri...   \n",
              "3  [{'category': 'procedural error', 'description...   \n",
              "4  [{'category': 'equipment failure', 'descriptio...   \n",
              "\n",
              "                                       combined_info  \\\n",
              "0  Type: Equipment Failure Description: Equipment...   \n",
              "1  Type: Fire Hazard Description: Fire Hazard occ...   \n",
              "2  Type: Confined Space Incident Description: Con...   \n",
              "3  Type: Equipment Failure Description: Equipment...   \n",
              "4  Type: Fire Hazard Description: Fire Hazard occ...   \n",
              "\n",
              "                                           embedding  \n",
              "0  [-0.04604925215244293, 0.12573133409023285, 0....  \n",
              "1  [-0.04193640872836113, 0.05664677545428276, 0....  \n",
              "2  [-0.0865219384431839, 0.0783221423625946, 0.11...  \n",
              "3  [-0.022067412734031677, 0.09491231292486191, 0...  \n",
              "4  [-0.021989304572343826, 0.046285584568977356, ...  "
            ]
          },
          "execution_count": 27,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "accidents_df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 28,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 293
        },
        "id": "GYtaBOgNMMwB",
        "outputId": "2a6ce7cf-ef22-4053-8071-a7cc6baf0b82"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"safety_df\",\n  \"rows\": 50,\n  \"fields\": [\n    {\n      \"column\": \"procedureId\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 50,\n        \"samples\": [\n          \"HEIGHTS-014\",\n          \"CONF-040\",\n          \"CONF-031\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"title\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 15,\n        \"samples\": [\n          \"Confined Space Entry Procedure\",\n          \"Chemical Handling Procedure\",\n          \"Confined Space Communication Protocol\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"description\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 15,\n        \"samples\": [\n          \"Guidelines for confined space entry procedure\",\n          \"Guidelines for chemical handling procedure\",\n          \"Guidelines for confined space communication protocol\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"category\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"confined space\",\n          \"working at heights\",\n          \"chemical handling\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"steps\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"lastUpdated\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"num_unique_values\": 50,\n        \"samples\": [\n          \"2023-09-24T08:53:38.622112\",\n          \"2024-07-26T08:53:38.622395\",\n          \"2023-11-26T08:53:38.622288\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"combined_info\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 48,\n        \"samples\": [\n          \"Title: Confined Space Communication Protocol Description: Guidelines for confined space communication protocol Category: confined space Steps: [{'description': 'Assess the confined space for hazards', 'stepNumber': 1}, {'description': 'Use appropriate PPE', 'stepNumber': 2}, {'description': 'Ensure communication with outside personnel', 'stepNumber': 3}]\",\n          \"Title: Advanced Confined Space Safety Description: Guidelines for advanced confined space safety Category: confined space Steps: [{'description': 'Use appropriate PPE', 'stepNumber': 1}, {'description': 'Ensure communication with outside personnel', 'stepNumber': 2}, {'description': 'Assess the confined space for hazards', 'stepNumber': 3}]\",\n          \"Title: Chemical Spill Response Procedure Description: Guidelines for chemical spill response procedure Category: chemical handling Steps: [{'description': 'Label and store chemicals safely', 'stepNumber': 1}, {'description': 'Wear appropriate chemical-resistant PPE', 'stepNumber': 2}, {'description': 'Use proper ventilation', 'stepNumber': 3}]\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embedding\",\n      \"properties\": {\n        \"dtype\": \"object\",\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe",
              "variable_name": "safety_df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-163fab67-7530-40a6-8359-4df21537a530\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>procedureId</th>\n",
              "      <th>title</th>\n",
              "      <th>description</th>\n",
              "      <th>category</th>\n",
              "      <th>steps</th>\n",
              "      <th>lastUpdated</th>\n",
              "      <th>combined_info</th>\n",
              "      <th>embedding</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>CONF-001</td>\n",
              "      <td>Confined Space Communication Protocol</td>\n",
              "      <td>Guidelines for confined space communication pr...</td>\n",
              "      <td>confined space</td>\n",
              "      <td>[{'description': 'Use appropriate PPE', 'stepN...</td>\n",
              "      <td>2024-01-13T08:53:38.621899</td>\n",
              "      <td>Title: Confined Space Communication Protocol D...</td>\n",
              "      <td>[0.009534717537462711, 0.06708501279354095, 0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>HEIGHTS-002</td>\n",
              "      <td>Scaffold Safety Procedure</td>\n",
              "      <td>Guidelines for scaffold safety procedure</td>\n",
              "      <td>working at heights</td>\n",
              "      <td>[{'description': 'Ensure fall protection gear ...</td>\n",
              "      <td>2023-12-26T08:53:38.621930</td>\n",
              "      <td>Title: Scaffold Safety Procedure Description: ...</td>\n",
              "      <td>[-0.0013834232231602073, 0.08337806910276413, ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>CHEM-003</td>\n",
              "      <td>Chemical Spill Response Procedure</td>\n",
              "      <td>Guidelines for chemical spill response procedure</td>\n",
              "      <td>chemical handling</td>\n",
              "      <td>[{'description': 'Use proper ventilation', 'st...</td>\n",
              "      <td>2024-04-27T08:53:38.621945</td>\n",
              "      <td>Title: Chemical Spill Response Procedure Descr...</td>\n",
              "      <td>[-0.06862455606460571, 0.07193397730588913, 0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>CONF-004</td>\n",
              "      <td>Advanced Confined Space Safety</td>\n",
              "      <td>Guidelines for advanced confined space safety</td>\n",
              "      <td>confined space</td>\n",
              "      <td>[{'description': 'Assess the confined space fo...</td>\n",
              "      <td>2024-03-31T08:53:38.621957</td>\n",
              "      <td>Title: Advanced Confined Space Safety Descript...</td>\n",
              "      <td>[-0.01785854995250702, 0.08748620748519897, 0....</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>HEIGHTS-005</td>\n",
              "      <td>Fall Protection Procedure</td>\n",
              "      <td>Guidelines for fall protection procedure</td>\n",
              "      <td>working at heights</td>\n",
              "      <td>[{'description': 'Ensure fall protection gear ...</td>\n",
              "      <td>2024-07-29T08:53:38.621969</td>\n",
              "      <td>Title: Fall Protection Procedure Description: ...</td>\n",
              "      <td>[-0.09375722706317902, 0.09517853707075119, 0....</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-163fab67-7530-40a6-8359-4df21537a530')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-163fab67-7530-40a6-8359-4df21537a530 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-163fab67-7530-40a6-8359-4df21537a530');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-92be1452-915f-4588-8f06-d6580acd42d1\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-92be1452-915f-4588-8f06-d6580acd42d1')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-92be1452-915f-4588-8f06-d6580acd42d1 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "   procedureId                                  title  \\\n",
              "0     CONF-001  Confined Space Communication Protocol   \n",
              "1  HEIGHTS-002              Scaffold Safety Procedure   \n",
              "2     CHEM-003      Chemical Spill Response Procedure   \n",
              "3     CONF-004         Advanced Confined Space Safety   \n",
              "4  HEIGHTS-005              Fall Protection Procedure   \n",
              "\n",
              "                                         description            category  \\\n",
              "0  Guidelines for confined space communication pr...      confined space   \n",
              "1           Guidelines for scaffold safety procedure  working at heights   \n",
              "2   Guidelines for chemical spill response procedure   chemical handling   \n",
              "3      Guidelines for advanced confined space safety      confined space   \n",
              "4           Guidelines for fall protection procedure  working at heights   \n",
              "\n",
              "                                               steps  \\\n",
              "0  [{'description': 'Use appropriate PPE', 'stepN...   \n",
              "1  [{'description': 'Ensure fall protection gear ...   \n",
              "2  [{'description': 'Use proper ventilation', 'st...   \n",
              "3  [{'description': 'Assess the confined space fo...   \n",
              "4  [{'description': 'Ensure fall protection gear ...   \n",
              "\n",
              "                  lastUpdated  \\\n",
              "0  2024-01-13T08:53:38.621899   \n",
              "1  2023-12-26T08:53:38.621930   \n",
              "2  2024-04-27T08:53:38.621945   \n",
              "3  2024-03-31T08:53:38.621957   \n",
              "4  2024-07-29T08:53:38.621969   \n",
              "\n",
              "                                       combined_info  \\\n",
              "0  Title: Confined Space Communication Protocol D...   \n",
              "1  Title: Scaffold Safety Procedure Description: ...   \n",
              "2  Title: Chemical Spill Response Procedure Descr...   \n",
              "3  Title: Advanced Confined Space Safety Descript...   \n",
              "4  Title: Fall Protection Procedure Description: ...   \n",
              "\n",
              "                                           embedding  \n",
              "0  [0.009534717537462711, 0.06708501279354095, 0....  \n",
              "1  [-0.0013834232231602073, 0.08337806910276413, ...  \n",
              "2  [-0.06862455606460571, 0.07193397730588913, 0....  \n",
              "3  [-0.01785854995250702, 0.08748620748519897, 0....  \n",
              "4  [-0.09375722706317902, 0.09517853707075119, 0....  "
            ]
          },
          "execution_count": 28,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "safety_df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 29,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "MOG6f76wMPd7",
        "outputId": "2c678121-74d3-473f-fc65-d6f7c5041d29"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Enter your MongoDB URI: ··········\n"
          ]
        }
      ],
      "source": [
        "set_env_securely(\"MONGO_URI\", \"Enter your MongoDB URI: \")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 30,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "oBaGJKEQMYWv",
        "outputId": "ae43d0d6-91c8-406e-c469-5a77b46b14cb"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Connection to MongoDB successful\n"
          ]
        }
      ],
      "source": [
        "import pymongo\n",
        "\n",
        "\n",
        "def get_mongo_client(mongo_uri):\n",
        "    \"\"\"Establish and validate connection to the MongoDB.\"\"\"\n",
        "\n",
        "    client = pymongo.MongoClient(\n",
        "        mongo_uri, appname=\"devrel.showcase.factory_safety_assistant.python\"\n",
        "    )\n",
        "\n",
        "    # Validate the connection\n",
        "    ping_result = client.admin.command(\"ping\")\n",
        "    if ping_result.get(\"ok\") == 1.0:\n",
        "        # Connection successful\n",
        "        print(\"Connection to MongoDB successful\")\n",
        "        return client\n",
        "    print(\"Connection to MongoDB failed\")\n",
        "    return None\n",
        "\n",
        "\n",
        "MONGO_URI = os.environ[\"MONGO_URI\"]\n",
        "\n",
        "if not MONGO_URI:\n",
        "    print(\"MONGO_URI not set in environment variables\")\n",
        "\n",
        "mongo_client = get_mongo_client(MONGO_URI)\n",
        "\n",
        "DB_NAME = \"factory_safety_use_case\"\n",
        "SAFETY_PROCEDURES_COLLECTION = \"safety_procedures\"\n",
        "ACCIDENTS_REPORT_COLLECTION = \"accident_report\"\n",
        "\n",
        "db = mongo_client.get_database(DB_NAME)\n",
        "safety_procedure_collection = db.get_collection(SAFETY_PROCEDURES_COLLECTION)\n",
        "accident_report_collection = db.get_collection(ACCIDENTS_REPORT_COLLECTION)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 31,
      "metadata": {
        "id": "Z8yqiFJRUiO-"
      },
      "outputs": [],
      "source": [
        "# Programmatically create vector search index for both colelctions\n",
        "from pymongo.operations import SearchIndexModel\n",
        "\n",
        "\n",
        "def setup_vector_search_index_with_filter(\n",
        "    collection, index_definition, index_name=\"vector_index_with_filter\"\n",
        "):\n",
        "    \"\"\"\n",
        "    Setup a vector search index for a MongoDB collection.\n",
        "\n",
        "    Args:\n",
        "    collection: MongoDB collection object\n",
        "    index_definition: Dictionary containing the index definition\n",
        "    index_name: Name of the index (default: \"vector_index_with_filter\")\n",
        "    \"\"\"\n",
        "    new_vector_search_index_model = SearchIndexModel(\n",
        "        definition=index_definition,\n",
        "        name=index_name,\n",
        "    )\n",
        "\n",
        "    # Create the new index\n",
        "    try:\n",
        "        result = collection.create_search_index(model=new_vector_search_index_model)\n",
        "        print(f\"Creating index '{index_name}'...\")\n",
        "        # time.sleep(20)  # Sleep for 20 seconds\n",
        "        print(f\"New index '{index_name}' created successfully:\", result)\n",
        "    except Exception as e:\n",
        "        print(f\"Error creating new vector search index '{index_name}': {e!s}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 32,
      "metadata": {
        "id": "KSYfS6q_VgF5"
      },
      "outputs": [],
      "source": [
        "# Define the vector search index definition\n",
        "vector_search_index_definition_safety_procedure = {\n",
        "    \"mappings\": {\n",
        "        \"dynamic\": True,\n",
        "        \"fields\": {\n",
        "            \"embedding\": {\n",
        "                \"dimensions\": 256,\n",
        "                \"similarity\": \"cosine\",\n",
        "                \"type\": \"knnVector\",\n",
        "            },\n",
        "            \"procedureId\": {\"type\": \"string\"},\n",
        "        },\n",
        "    }\n",
        "}\n",
        "\n",
        "vector_search_index_definition_accident_reports = {\n",
        "    \"mappings\": {\n",
        "        \"dynamic\": True,\n",
        "        \"fields\": {\n",
        "            \"embedding\": {\n",
        "                \"dimensions\": 256,\n",
        "                \"similarity\": \"cosine\",\n",
        "                \"type\": \"knnVector\",\n",
        "            },\n",
        "            \"incidentId\": {\"type\": \"string\"},\n",
        "        },\n",
        "    }\n",
        "}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 33,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "94eeHufiWh2h",
        "outputId": "e108c880-d9bb-4743-95fd-82a4629f2a1a"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Error creating new vector search index 'vector_index_with_filter': Duplicate Index, full error: {'ok': 0.0, 'errmsg': 'Duplicate Index', 'code': 68, 'codeName': 'IndexAlreadyExists', '$clusterTime': {'clusterTime': Timestamp(1724932771, 48), 'signature': {'hash': b'\\xf1\\xee\\x04\\xa0w:\\xb7{)\\xf6\\xbc\\xc2\\x103i\\xebcv\\xaet', 'keyId': 7353740577831124994}}, 'operationTime': Timestamp(1724932771, 48)}\n",
            "Error creating new vector search index 'vector_index_with_filter': Duplicate Index, full error: {'ok': 0.0, 'errmsg': 'Duplicate Index', 'code': 68, 'codeName': 'IndexAlreadyExists', '$clusterTime': {'clusterTime': Timestamp(1724932771, 48), 'signature': {'hash': b'\\xf1\\xee\\x04\\xa0w:\\xb7{)\\xf6\\xbc\\xc2\\x103i\\xebcv\\xaet', 'keyId': 7353740577831124994}}, 'operationTime': Timestamp(1724932771, 48)}\n"
          ]
        }
      ],
      "source": [
        "setup_vector_search_index_with_filter(\n",
        "    safety_procedure_collection, vector_search_index_definition_safety_procedure\n",
        ")\n",
        "setup_vector_search_index_with_filter(\n",
        "    accident_report_collection, vector_search_index_definition_accident_reports\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 34,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mbD0xFx9M5Oc",
        "outputId": "cde94386-eee7-4ad2-d7ee-f47d2af8093f"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "DeleteResult({'n': 100, 'electionId': ObjectId('7fffffff0000000000000032'), 'opTime': {'ts': Timestamp(1724932786, 150), 't': 50}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1724932786, 150), 'signature': {'hash': b'\\xa1^\\xb7L\\xba\\xe1vp\\xedVF~\\xb5\\xbb\\xde\\xb6\\xa2\\xe3\\xe6-', 'keyId': 7353740577831124994}}, 'operationTime': Timestamp(1724932786, 150)}, acknowledged=True)"
            ]
          },
          "execution_count": 34,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "# Delete any existing records in the collections\n",
        "safety_procedure_collection.delete_many({})\n",
        "accident_report_collection.delete_many({})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 35,
      "metadata": {
        "id": "i6gyle3NP2rQ"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "from pymongo.errors import BulkWriteError\n",
        "\n",
        "\n",
        "def insert_df_to_mongodb(df, collection, batch_size=1000):\n",
        "    \"\"\"\n",
        "    Insert a pandas DataFrame into a MongoDB collection.\n",
        "\n",
        "    Parameters:\n",
        "    df (pandas.DataFrame): The DataFrame to insert\n",
        "    collection (pymongo.collection.Collection): The MongoDB collection to insert into\n",
        "    batch_size (int): Number of documents to insert in each batch\n",
        "\n",
        "    Returns:\n",
        "    int: Number of documents successfully inserted\n",
        "    \"\"\"\n",
        "    total_inserted = 0\n",
        "\n",
        "    # Convert DataFrame to list of dictionaries\n",
        "    records = df.to_dict(\"records\")\n",
        "\n",
        "    # Insert in batches\n",
        "    for i in range(0, len(records), batch_size):\n",
        "        batch = records[i : i + batch_size]\n",
        "        try:\n",
        "            result = collection.insert_many(batch, ordered=False)\n",
        "            total_inserted += len(result.inserted_ids)\n",
        "            print(\n",
        "                f\"Inserted batch {i//batch_size + 1}: {len(result.inserted_ids)} documents\"\n",
        "            )\n",
        "        except BulkWriteError as bwe:\n",
        "            total_inserted += bwe.details[\"nInserted\"]\n",
        "            print(\n",
        "                f\"Batch {i//batch_size + 1} partially inserted. {bwe.details['nInserted']} inserted, {len(bwe.details['writeErrors'])} failed.\"\n",
        "            )\n",
        "\n",
        "    return total_inserted"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 36,
      "metadata": {
        "id": "W-Njsy53Ti8J"
      },
      "outputs": [],
      "source": [
        "def print_dataframe_info(df, df_name):\n",
        "    print(f\"\\n{df_name} DataFrame info:\")\n",
        "    print(df.info())\n",
        "    print(f\"\\nFirst few rows of the {df_name} DataFrame:\")\n",
        "    print(df.head())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 37,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ewXKT0U7M_A7",
        "outputId": "1f2802f6-cccf-4478-df12-23c47bef0c63"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Inserted batch 1: 50 documents\n",
            "Safety procedures data ingestion completed. Total documents inserted: 50\n",
            "Inserted batch 1: 100 documents\n",
            "Accident reports data ingestion completed. Total documents inserted: 100\n",
            "\n",
            "Insertion Summary:\n",
            "Safety Procedures inserted: 50\n",
            "Accident Reports inserted: 100\n"
          ]
        }
      ],
      "source": [
        "# Insert safety procedures\n",
        "try:\n",
        "    total_inserted_safety = insert_df_to_mongodb(safety_df, safety_procedure_collection)\n",
        "    print(\n",
        "        f\"Safety procedures data ingestion completed. Total documents inserted: {total_inserted_safety}\"\n",
        "    )\n",
        "except Exception as e:\n",
        "    print(f\"An error occurred while inserting safety procedures: {e}\")\n",
        "    print(\"Pandas version:\", pd.__version__)\n",
        "    print_dataframe_info(safety_df, \"Safety Procedures\")\n",
        "\n",
        "# Insert accident reports\n",
        "try:\n",
        "    total_inserted_accidents = insert_df_to_mongodb(\n",
        "        accidents_df, accident_report_collection\n",
        "    )\n",
        "    print(\n",
        "        f\"Accident reports data ingestion completed. Total documents inserted: {total_inserted_accidents}\"\n",
        "    )\n",
        "except Exception as e:\n",
        "    print(f\"An error occurred while inserting accident reports: {e}\")\n",
        "    print(\"Pandas version:\", pd.__version__)\n",
        "    print_dataframe_info(accidents_df, \"Accident Reports\")\n",
        "\n",
        "# Final summary\n",
        "print(\"\\nInsertion Summary:\")\n",
        "print(\n",
        "    f\"Safety Procedures inserted: {total_inserted_safety if 'total_inserted_safety' in locals() else 'Failed'}\"\n",
        ")\n",
        "print(\n",
        "    f\"Accident Reports inserted: {total_inserted_accidents if 'total_inserted_accidents' in locals() else 'Failed'}\"\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 38,
      "metadata": {
        "id": "2WMIykXiSRgz"
      },
      "outputs": [],
      "source": [
        "def vector_search(user_query, collection):\n",
        "    \"\"\"\n",
        "    Perform a vector search in the MongoDB collection based on the user query.\n",
        "\n",
        "    Args:\n",
        "    user_query (str): The user's query string.\n",
        "    collection (MongoCollection): The MongoDB collection to search.\n",
        "\n",
        "    Returns:\n",
        "    list: A list of matching documents.\n",
        "    \"\"\"\n",
        "\n",
        "    # Generate embedding for the user query\n",
        "    query_embedding = get_embedding(user_query)\n",
        "\n",
        "    if query_embedding is None:\n",
        "        return \"Invalid query or embedding generation failed.\"\n",
        "\n",
        "    # Define the vector search pipeline\n",
        "    vector_search_stage = {\n",
        "        \"$vectorSearch\": {\n",
        "            \"index\": \"vector_index_with_filter\",\n",
        "            \"queryVector\": query_embedding,\n",
        "            \"path\": \"embedding\",\n",
        "            \"numCandidates\": 150,  # Number of candidate matches to consider\n",
        "            \"limit\": 5,  # Return top 4 matches\n",
        "        }\n",
        "    }\n",
        "\n",
        "    unset_stage = {\n",
        "        \"$unset\": \"embedding\"  # Exclude the 'embedding' field from the results\n",
        "    }\n",
        "\n",
        "    project_stage = {\n",
        "        \"$project\": {\n",
        "            \"_id\": 0,  # Exclude the _id field,\n",
        "            \"combined_info\": 1,\n",
        "            \"score\": {\n",
        "                \"$meta\": \"vectorSearchScore\"  # Include the search score\n",
        "            },\n",
        "        }\n",
        "    }\n",
        "\n",
        "    pipeline = [vector_search_stage, unset_stage, project_stage]\n",
        "\n",
        "    # Execute the search\n",
        "    results = collection.aggregate(pipeline)\n",
        "    return list(results)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 39,
      "metadata": {
        "id": "Gdgp93nlW05Q"
      },
      "outputs": [],
      "source": [
        "def get_vector_search_result(query, collection):\n",
        "    get_knowledge = vector_search(query, collection)\n",
        "    search_results = []\n",
        "    for result in get_knowledge:\n",
        "        search_results.append(\n",
        "            [result.get(\"score\", \"N/A\"), result.get(\"combined_info\", \"N/A\")]\n",
        "        )\n",
        "    return search_results"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 40,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2YIZDMGgXLJD",
        "outputId": "a2faed87-709e-4e14-9423-af59ab6abc7c"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Query: Get me a saftey procedure related to helmet incidents\n",
            "\n",
            "Continue to answer the query by using the Search Results:\n",
            "\n",
            "+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
            "|   Similarity Score | Combined Information                                                                                                                                                                                                                                                                                                                                                                                                                                                                |\n",
            "+====================+=====================================================================================================================================================================================================================================================================================================================================================================================================================================================================================+\n",
            "|           0.822171 | Title: Scaffold Safety Procedure Description: Guidelines for scaffold safety procedure Category: working at heights Steps: [{'description': 'Ensure fall protection gear is worn', 'stepNumber': 1}, {'description': 'Maintain three points of contact', 'stepNumber': 2}, {'description': 'Identify potential hazards', 'stepNumber': 3}, {'description': 'Check equipment and anchor points', 'stepNumber': 4}]                                                                   |\n",
            "+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
            "|           0.821077 | Title: Scaffold Safety Procedure Description: Guidelines for scaffold safety procedure Category: working at heights Steps: [{'description': 'Check equipment and anchor points', 'stepNumber': 1}, {'description': 'Identify potential hazards', 'stepNumber': 2}, {'description': 'Ensure fall protection gear is worn', 'stepNumber': 3}, {'description': 'Follow emergency rescue plan', 'stepNumber': 4}, {'description': 'Maintain three points of contact', 'stepNumber': 5}] |\n",
            "+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
            "|           0.815926 | Title: Scaffold Safety Procedure Description: Guidelines for scaffold safety procedure Category: working at heights Steps: [{'description': 'Ensure fall protection gear is worn', 'stepNumber': 1}, {'description': 'Check equipment and anchor points', 'stepNumber': 2}, {'description': 'Identify potential hazards', 'stepNumber': 3}]                                                                                                                                         |\n",
            "+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
            "|           0.804019 | Title: Ladder Safety Procedure Description: Guidelines for ladder safety procedure Category: working at heights Steps: [{'description': 'Ensure fall protection gear is worn', 'stepNumber': 1}, {'description': 'Follow emergency rescue plan', 'stepNumber': 2}, {'description': 'Maintain three points of contact', 'stepNumber': 3}, {'description': 'Check equipment and anchor points', 'stepNumber': 4}]                                                                     |\n",
            "+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
            "|           0.803579 | Title: Ladder Safety Procedure Description: Guidelines for ladder safety procedure Category: working at heights Steps: [{'description': 'Identify potential hazards', 'stepNumber': 1}, {'description': 'Follow emergency rescue plan', 'stepNumber': 2}, {'description': 'Maintain three points of contact', 'stepNumber': 3}, {'description': 'Check equipment and anchor points', 'stepNumber': 4}, {'description': 'Ensure fall protection gear is worn', 'stepNumber': 5}]     |\n",
            "+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n",
            "\n"
          ]
        }
      ],
      "source": [
        "import tabulate\n",
        "\n",
        "query = \"Get me a saftey procedure related to helmet incidents\"\n",
        "source_information = get_vector_search_result(query, safety_procedure_collection)\n",
        "\n",
        "table_headers = [\"Similarity Score\", \"Combined Information\"]\n",
        "table = tabulate.tabulate(source_information, headers=table_headers, tablefmt=\"grid\")\n",
        "\n",
        "combined_information = f\"\"\"Query: {query}\n",
        "\n",
        "Continue to answer the query by using the Search Results:\n",
        "\n",
        "{table}\n",
        "\"\"\"\n",
        "\n",
        "print(combined_information)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 36,
      "metadata": {
        "id": "xAimAJ3LYg9X"
      },
      "outputs": [],
      "source": [
        "%pip install --quiet -U langchain langchain_mongodb langgraph langsmith motor langchain_anthropic # langchain-groq"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 37,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PKuxHcPtua5j",
        "outputId": "362640d8-5ad2-4c7a-9ee3-e476d116a88d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Enter your Anthropic API key: ··········\n"
          ]
        }
      ],
      "source": [
        "set_env_securely(\"ANTHROPIC_API_KEY\", \"Enter your Anthropic API key: \")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 38,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "k-jxtpjU48q9",
        "outputId": "935ddb44-f6aa-43f6-fd6b-3c0084ae4b4b"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Enter your Groq API key: ··········\n"
          ]
        }
      ],
      "source": [
        "# Uncomment below to utilize Groq\n",
        "set_env_securely(\"GROQ_API_KEY\", \"Enter your Groq API key: \")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 64,
      "metadata": {
        "id": "ip1cMrUnlAMr"
      },
      "outputs": [],
      "source": [
        "# Programatically create search indexes\n",
        "\n",
        "\n",
        "def create_collection_search_index(collection, index_definition, index_name):\n",
        "    \"\"\"\n",
        "    Create a search index for a MongoDB Atlas collection.\n",
        "\n",
        "    Args:\n",
        "    collection: MongoDB collection object\n",
        "    index_definition: Dictionary defining the index mappings\n",
        "    index_name: String name for the index\n",
        "\n",
        "    Returns:\n",
        "    str: Result of the index creation operation\n",
        "    \"\"\"\n",
        "\n",
        "    try:\n",
        "        search_index_model = SearchIndexModel(\n",
        "            definition=index_definition, name=index_name\n",
        "        )\n",
        "\n",
        "        result = collection.create_search_index(model=search_index_model)\n",
        "        print(f\"Search index '{index_name}' created successfully\")\n",
        "        return result\n",
        "    except Exception as e:\n",
        "        print(f\"Error creating search index: {e!s}\")\n",
        "        return None\n",
        "\n",
        "\n",
        "def print_collection_search_indexes(collection):\n",
        "    \"\"\"\n",
        "    Print all search indexes for a given collection.\n",
        "\n",
        "    Args:\n",
        "    collection: MongoDB collection object\n",
        "    \"\"\"\n",
        "    print(f\"\\nSearch indexes for collection '{collection.name}':\")\n",
        "    for index in collection.list_search_indexes():\n",
        "        print(f\"Index: {index['name']}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 65,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "YPOiT3sDlefh",
        "outputId": "ed7edc48-7572-4676-c33d-3e2c85d37a92"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Error creating search index: Duplicate Index, full error: {'ok': 0.0, 'errmsg': 'Duplicate Index', 'code': 68, 'codeName': 'IndexAlreadyExists', '$clusterTime': {'clusterTime': Timestamp(1724864038, 1), 'signature': {'hash': b'\\x08\\x19U\\xbb\\xe3Y\\txs\\xad?y\\xd1\"\\x0b]\\xa5\\xb5*\\x13', 'keyId': 7353740577831124994}}, 'operationTime': Timestamp(1724864038, 1)}\n",
            "\n",
            "Search indexes for collection 'safety_procedures':\n",
            "Index: vector_index_with_filter\n",
            "Index: text_search_index\n"
          ]
        }
      ],
      "source": [
        "safety_procedure_collection_text_index_definition = {\n",
        "    \"mappings\": {\n",
        "        \"dynamic\": True,\n",
        "        \"fields\": {\n",
        "            \"title\": {\"type\": \"string\"},\n",
        "            \"description\": {\"type\": \"string\"},\n",
        "            \"category\": {\"type\": \"string\"},\n",
        "            \"steps.description\": {\"type\": \"string\"},\n",
        "        },\n",
        "    }\n",
        "}\n",
        "\n",
        "create_collection_search_index(\n",
        "    safety_procedure_collection,\n",
        "    safety_procedure_collection_text_index_definition,\n",
        "    \"text_search_index\",\n",
        ")\n",
        "\n",
        "# Print all indexes in the collection\n",
        "print_collection_search_indexes(safety_procedure_collection)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 66,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Je5iJ7TPgplJ",
        "outputId": "76fe8986-fc46-4cea-8d20-ceaf51f41daa"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Error creating search index: Duplicate Index, full error: {'ok': 0.0, 'errmsg': 'Duplicate Index', 'code': 68, 'codeName': 'IndexAlreadyExists', '$clusterTime': {'clusterTime': Timestamp(1724864038, 1), 'signature': {'hash': b'\\x08\\x19U\\xbb\\xe3Y\\txs\\xad?y\\xd1\"\\x0b]\\xa5\\xb5*\\x13', 'keyId': 7353740577831124994}}, 'operationTime': Timestamp(1724864038, 1)}\n",
            "\n",
            "Search indexes for collection 'accident_report':\n",
            "Index: vector_index_with_filter\n",
            "Index: text_search_index\n"
          ]
        }
      ],
      "source": [
        "accident_report_collection_text_index_definition = {\n",
        "    \"mappings\": {\n",
        "        \"dynamic\": True,\n",
        "        \"fields\": {\"type\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}},\n",
        "    }\n",
        "}\n",
        "\n",
        "create_collection_search_index(\n",
        "    accident_report_collection,\n",
        "    accident_report_collection_text_index_definition,\n",
        "    \"text_search_index\",\n",
        ")\n",
        "\n",
        "# Print all indexes in the collection\n",
        "print_collection_search_indexes(accident_report_collection)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 67,
      "metadata": {
        "id": "Ayq6AqE_hYO-"
      },
      "outputs": [],
      "source": [
        "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
        "from langchain_mongodb.retrievers import MongoDBAtlasHybridSearchRetriever\n",
        "from langchain_openai import OpenAIEmbeddings\n",
        "\n",
        "ATLAS_VECTOR_SEARCH_INDEX = \"vector_index_with_filter\"\n",
        "embedding_model = OpenAIEmbeddings(\n",
        "    model=OPEN_AI_EMBEDDING_MODEL, dimensions=OPEN_AI_EMBEDDING_MODEL_DIMENSION\n",
        ")\n",
        "\n",
        "# Vector Stores Intialisation\n",
        "vector_store_safety_procedures = MongoDBAtlasVectorSearch.from_connection_string(\n",
        "    connection_string=MONGO_URI,\n",
        "    namespace=DB_NAME + \".\" + SAFETY_PROCEDURES_COLLECTION,\n",
        "    embedding=embedding_model,\n",
        "    index_name=ATLAS_VECTOR_SEARCH_INDEX,\n",
        "    text_key=\"combined_info\",\n",
        ")\n",
        "\n",
        "hybrid_search = MongoDBAtlasHybridSearchRetriever(\n",
        "    vectorstore=vector_store_safety_procedures,\n",
        "    search_index_name=\"text_search_index\",\n",
        "    top_k=5,\n",
        ")\n",
        "\n",
        "hybrid_search_result = hybrid_search.get_relevant_documents(query)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 68,
      "metadata": {
        "id": "O49VEL9ln7IC"
      },
      "outputs": [],
      "source": [
        "def hybrid_search_results_to_table(search_results):\n",
        "    \"\"\"\n",
        "    Convert hybrid search results to a formatted markdown table.\n",
        "\n",
        "    Args:\n",
        "    search_results (list): List of Document objects containing search results\n",
        "\n",
        "    Returns:\n",
        "    str: Formatted markdown table of search results\n",
        "    \"\"\"\n",
        "    # Extract relevant information from each result\n",
        "    data = []\n",
        "    for rank, doc in enumerate(search_results, start=1):\n",
        "        metadata = doc.metadata\n",
        "        data.append(\n",
        "            {\n",
        "                \"Rank\": rank,\n",
        "                \"Procedure ID\": metadata[\"procedureId\"],\n",
        "                \"Title\": metadata[\"title\"],\n",
        "                \"Category\": metadata[\"category\"],\n",
        "                \"Vector Score\": round(metadata[\"vector_score\"], 5),\n",
        "                \"Full-text Score\": round(metadata[\"fulltext_score\"], 5),\n",
        "                \"Total Score\": round(metadata[\"score\"], 5),\n",
        "            }\n",
        "        )\n",
        "\n",
        "    # Create a DataFrame\n",
        "    df = pd.DataFrame(data)\n",
        "\n",
        "    # Generate markdown table\n",
        "    table = tabulate.tabulate(df, headers=\"keys\", tablefmt=\"pipe\", showindex=False)\n",
        "\n",
        "    return table"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 69,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BnfdHfkdn_3j",
        "outputId": "09921115-53e4-4642-c4c9-15deaf1ad0dd"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "|   Rank | Procedure ID   | Title                           | Category           |   Vector Score |   Full-text Score |   Total Score |\n",
            "|-------:|:---------------|:--------------------------------|:-------------------|---------------:|------------------:|--------------:|\n",
            "|      1 | HEIGHTS-020    | Scaffold Safety Procedure       | working at heights |        0.01587 |           0.01538 |       0.03126 |\n",
            "|      2 | HEIGHTS-050    | Scaffold Safety Procedure       | working at heights |        0.01639 |           0       |       0.01639 |\n",
            "|      3 | CONF-007       | Confined Space Rescue Procedure | confined space     |        0       |           0.01639 |       0.01639 |\n",
            "|      4 | HEIGHTS-044    | Ladder Safety Procedure         | working at heights |        0       |           0.01613 |       0.01613 |\n",
            "|      5 | HEIGHTS-002    | Scaffold Safety Procedure       | working at heights |        0.01613 |           0       |       0.01613 |\n"
          ]
        }
      ],
      "source": [
        "table = hybrid_search_results_to_table(hybrid_search_result)\n",
        "print(table)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 70,
      "metadata": {
        "id": "5kiSt-TTkjzD"
      },
      "outputs": [],
      "source": [
        "from langchain_mongodb.retrievers import MongoDBAtlasFullTextSearchRetriever\n",
        "\n",
        "full_text_search = MongoDBAtlasFullTextSearchRetriever(\n",
        "    collection=safety_procedure_collection,\n",
        "    search_index_name=\"text_search_index\",\n",
        "    search_field=\"description\",\n",
        "    top_k=5,\n",
        ")\n",
        "full_text_search_result = full_text_search.get_relevant_documents(\"Guidelines\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 71,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xZe38tSJls3-",
        "outputId": "61593365-8ea6-4a4d-bce7-cccec05d4aa1"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[Document(metadata={'_id': '66cf513f36201a6c2ff0dcf6', 'procedureId': 'HEIGHTS-050', 'title': 'Scaffold Safety Procedure', 'category': 'working at heights', 'steps': [{'stepNumber': 1, 'description': 'Check equipment and anchor points'}, {'stepNumber': 2, 'description': 'Identify potential hazards'}, {'stepNumber': 3, 'description': 'Ensure fall protection gear is worn'}, {'stepNumber': 4, 'description': 'Follow emergency rescue plan'}, {'stepNumber': 5, 'description': 'Maintain three points of contact'}], 'lastUpdated': '2023-10-07T08:53:38.622518', 'combined_info': \"Title: Scaffold Safety Procedure Description: Guidelines for scaffold safety procedure Category: working at heights Steps: [{'stepNumber': 1, 'description': 'Check equipment and anchor points'}, {'stepNumber': 2, 'description': 'Identify potential hazards'}, {'stepNumber': 3, 'description': 'Ensure fall protection gear is worn'}, {'stepNumber': 4, 'description': 'Follow emergency rescue plan'}, {'stepNumber': 5, 'description': 'Maintain three points of contact'}]\", 'embedding': [0.010677263140678406, 0.0832085832953453, 0.1603911966085434, -0.0017732668202370405, -0.06754106283187866, 0.04863458871841431, -0.03703460097312927, -0.03906836360692978, 0.017952365800738335, 0.030104735866189003, -0.01694803684949875, -0.1532604694366455, 0.0070365737192332745, 0.022145435214042664, 0.004249563440680504, 0.001840745098888874, 0.0354025661945343, -0.006691336166113615, 0.003923156764358282, 0.045671820640563965, 0.03753676265478134, 0.04695234075188637, 0.03590473160147667, 0.09741982817649841, -0.09691766649484634, -0.013935052789747715, 0.13548386096954346, 0.017111239954829216, -0.036758407950401306, 0.019283099099993706, 0.07150816172361374, -0.05729692056775093, -0.0936535969376564, 0.11228388547897339, 0.11569859832525253, -0.02455582283437252, -0.048333290964365005, 0.029979195445775986, -0.06744063645601273, -0.04336186498403549, 0.13407780230045319, -0.016922928392887115, -0.020839808508753777, -0.01899435557425022, 0.0590042769908905, -0.06387526541948318, 0.04268394410610199, 0.007758434861898422, -0.0170610249042511, 0.07371768355369568, 0.0692986398935318, -0.04624931141734123, 0.05654367431998253, 0.08652286976575851, -0.07190989702939987, 0.006380622275173664, 0.015491761267185211, -0.04850905016064644, -0.012629426084458828, 0.06965015828609467, 0.02509564906358719, -0.08622156828641891, -0.11188215017318726, 0.049965325742959976, 0.010576830245554447, -0.051346275955438614, -0.03783806040883064, 0.009302589111030102, 0.06728998571634293, -0.03334369510412216, 0.08818001300096512, -0.002096534939482808, 0.11660249531269073, -0.07457136362791061, -0.09616442024707794, -0.12242759764194489, -0.07939213514328003, 0.08958607167005539, -0.02581123262643814, -0.04275926947593689, 0.004560277331620455, -0.05019129812717438, -0.05584064498543739, 0.009993065148591995, -0.09194623678922653, -0.019082235172390938, -0.05508739873766899, -0.05343025550246239, -0.13046222925186157, -0.02230863831937313, -0.08948563784360886, 0.013922498561441898, -0.06387526541948318, 0.01534111239016056, 0.07401898503303528, 0.009516009129583836, 0.0570458360016346, 0.06121379882097244, -0.04956359416246414, -0.0063931760378181934, 0.14271502196788788, -0.034624211490154266, -0.07296443730592728, -0.08361031860113144, -0.00300827668979764, -0.0340467244386673, 0.004258979111909866, 0.10856787115335464, -0.029878761619329453, -0.01751297153532505, -0.09144407510757446, 0.0056179603561758995, -0.08883282542228699, 0.14894185960292816, 0.09767091274261475, 0.025497380644083023, 0.09043974429368973, -0.044190436601638794, 0.04100169613957405, 0.015705181285738945, -0.06804323196411133, -0.009302589111030102, 0.013596092350780964, -0.06819388270378113, -0.008888304233551025, -0.019885696470737457, 0.06297137588262558, -0.007118175737559795, 0.016044141724705696, -0.07909084111452103, -0.04617398604750633, -0.0824553370475769, 0.0359298400580883, -0.1511513888835907, 0.034774862229824066, 0.007080513052642345, -0.07698175311088562, 0.019772710278630257, -0.06362418830394745, 0.10118605941534042, 0.0229237899184227, 0.0571964867413044, -0.013960161246359348, 0.04770558699965477, -0.06844495981931686, -0.04883545637130737, -0.005373155698180199, 0.12192543596029282, -0.012598040513694286, 0.06703890115022659, -0.05684497207403183, 0.0047328961081802845, 0.020990457385778427, 0.07180946320295334, 0.028347160667181015, -0.16390635073184967, -0.09812285751104355, -0.00011239844025112689, 0.0794423520565033, 0.04363805800676346, -0.03311771899461746, -0.048031993210315704, 0.01595626398921013, 0.02714196778833866, 0.006239388603717089, -0.037059709429740906, -0.057849299162626266, -0.06096271798014641, -0.011568604968488216, 0.0169856995344162, 0.011907565407454967, -0.07241205871105194, 0.0462995283305645, -0.03422247990965843, 0.018328988924622536, -0.020626388490200043, -0.061816394329071045, 0.0947081446647644, -0.006653673946857452, 0.07718261331319809, 0.013169252313673496, 0.00925237312912941, -0.06628565490245819, 0.014186134561896324, -0.03193763270974159, 0.029075298458337784, 0.02009911648929119, 0.012233971618115902, -0.013257130980491638, 0.07000166922807693, -0.02957746386528015, -0.04062507301568985, 0.021982232108712196, -0.05684497207403183, -0.02513331174850464, -0.0285229180008173, -0.04356273263692856, -0.012422283180058002, 0.06924842298030853, 0.02837226912379265, -0.03635667636990547, -0.16571414470672607, 0.07798607647418976, 0.013759295456111431, -0.033594775944948196, 0.07085534930229187, -0.1664171665906906, 0.047228530049324036, 0.037561871111392975, -0.1944379210472107, 0.06407613307237625, 0.035126376897096634, 0.08576962351799011, 0.1183098554611206, -0.0824553370475769, 0.03941987827420235, -0.032188717275857925, -0.0012483485043048859, -0.0035685033071786165, 0.012579210102558136, -0.03414715453982353, 0.03346923366189003, 0.06503024697303772, 0.03444845601916313, -0.0032703434117138386, 0.06136444956064224, 0.10058346390724182, -0.022107772529125214, 0.10555488616228104, -0.015906047075986862, -0.08888304233551025, -0.04436619579792023, 0.046123769134283066, 0.05338003858923912, -0.00616720225661993, 0.03598005324602127, 0.046475283801555634, -0.017086131498217583, -0.04499389976263046, -0.035503000020980835, 0.06030990555882454, 0.015529423952102661, -0.041328102350234985, 0.02912551537156105, 0.07808651030063629, -0.04311078414320946, 0.02970300428569317, 0.009641549549996853, -0.015981370583176613, 0.1025921180844307, 0.05935579165816307, -0.059757523238658905, 0.09390468150377274, -0.10947176814079285, -0.049438051879405975, 0.015027259476482868], 'score': 0.004708940163254738}, page_content='Guidelines for scaffold safety procedure'), Document(metadata={'_id': '66cf513f36201a6c2ff0dcf1', 'procedureId': 'CHEM-045', 'title': 'Chemical Storage Safety', 'category': 'chemical handling', 'steps': [{'stepNumber': 1, 'description': 'Dispose of chemicals per regulations'}, {'stepNumber': 2, 'description': 'Review Safety Data Sheets (SDS)'}, {'stepNumber': 3, 'description': 'Label and store chemicals safely'}], 'lastUpdated': '2024-04-11T08:53:38.622442', 'combined_info': \"Title: Chemical Storage Safety Description: Guidelines for chemical storage safety Category: chemical handling Steps: [{'stepNumber': 1, 'description': 'Dispose of chemicals per regulations'}, {'stepNumber': 2, 'description': 'Review Safety Data Sheets (SDS)'}, {'stepNumber': 3, 'description': 'Label and store chemicals safely'}]\", 'embedding': [0.0024450658820569515, 0.06505412608385086, 0.014996403828263283, -0.012117684818804264, -0.005917366594076157, -0.004868602845817804, -0.022254712879657745, -0.02918086014688015, -0.12262850999832153, 0.00693845096975565, 0.020151032134890556, -0.09462866187095642, -0.022402338683605194, -0.050193049013614655, 0.006000406574457884, 4.0654984331922606e-05, -0.019129948690533638, 0.0520629845559597, 0.06485728919506073, 0.025022711604833603, -0.06116662546992302, 0.06731773912906647, 0.05398213118314743, 0.07957074791193008, -0.08286774158477783, -0.05083276331424713, 0.1479218602180481, -0.07725793123245239, 0.002251305850222707, 0.034569233655929565, 0.14024528861045837, -0.055458396673202515, -0.007873419672250748, 0.02494889684021473, 0.09433340281248093, 0.0018161148764193058, -0.0354795977473259, -0.09005223214626312, -0.014233666472136974, -0.062347639352083206, 0.003260087687522173, 0.037546370178461075, 0.02812287025153637, 0.07588008046150208, 0.10796426236629486, -0.010087818838655949, 0.014713452197611332, 0.04059731960296631, 0.0612158365547657, -0.019474411383271217, 0.0432053878903389, -0.016952456906437874, 0.0376201830804348, -0.0407695509493351, 0.003447696566581726, -0.0769626721739769, 0.0237432811409235, -0.009300476871430874, 0.025391777977347374, 0.0026741945184767246, 0.06013324111700058, -0.09630176424980164, -0.08016125112771988, 0.056344158947467804, 0.005526771303266287, -0.03587326779961586, -0.03855514898896217, 0.06357786059379578, -0.022562267258763313, -0.024026231840252876, 0.0809485912322998, 0.02268528938293457, 0.06584146618843079, -0.008383961394429207, -0.07012264430522919, -0.055458396673202515, -0.11160571873188019, 0.0635286495089531, 0.003955163061618805, -0.08882201462984085, -0.05998561531305313, 0.030534105375409126, -0.06702248007059097, 0.003816763171926141, -0.09846695512533188, 0.010659871622920036, -0.006969206500798464, -0.06835111975669861, -0.07071314752101898, -0.09950033575296402, -0.12518736720085144, -0.03781701624393463, -0.0932508111000061, 0.07770080864429474, -0.012093080207705498, -0.03993299975991249, 0.11032629013061523, 0.007430539466440678, -0.08055492490530014, 0.02072923630475998, -0.017518358305096626, -0.03021424636244774, 0.009614183567464352, 0.05255507305264473, 0.022943636402487755, -0.0509311817586422, -0.07765159755945206, 0.04866757243871689, -0.04241804778575897, -0.03464304655790329, -0.026449767872691154, -0.03513513505458832, -0.031001588329672813, 0.1590430736541748, -0.0008642307948321104, 0.008371659554541111, 0.04303315654397011, 0.0008696130244061351, -0.03944091126322746, 0.10392913967370987, -0.10747217386960983, -0.009694147855043411, 0.02177492529153824, -0.1261715441942215, -0.013384813442826271, 0.0015746839344501495, 0.016878642141819, -0.08340903371572495, -0.011754768900573254, -0.045641228556632996, 0.009214361198246479, -0.06131425499916077, 0.049823980778455734, -0.10235445201396942, 0.05152168869972229, -0.003659909823909402, 0.00027506990591064095, -0.054769475013017654, -0.02918086014688015, 0.0838027074933052, 0.04342683032155037, -0.019117645919322968, 0.11564084887504578, 0.10363388061523438, -0.12085698544979095, -0.11445983499288559, -0.06928609311580658, 0.1453630030155182, 0.0246044360101223, 0.010155480355024338, -0.053342416882514954, 0.052997954189777374, 0.058804601430892944, -0.007971837185323238, 0.09708910435438156, -0.12174274772405624, 0.020163334906101227, 0.05314557999372482, 0.1129835695028305, -0.07012264430522919, -0.012056173756718636, 0.014787266030907631, -0.052505865693092346, -0.09147929400205612, 0.032797712832689285, -0.01915455237030983, -0.019978802651166916, -0.07277992367744446, -0.010130876675248146, 0.006360246799886227, -0.039465513080358505, -0.008697668090462685, 0.05762358754873276, -0.07346884161233902, -0.012068475596606731, -0.007590468507260084, -0.034200165420770645, 0.06869558244943619, -0.011453364975750446, 0.08700128644704819, 0.044411007314920425, -0.029968202114105225, -0.1293209195137024, 0.058607764542102814, 0.03725111484527588, -0.010936671867966652, -0.03813687339425087, -0.07804527133703232, -0.051423270255327225, -0.009380441159009933, -0.07115602493286133, 0.010721382685005665, -0.029672948643565178, 0.031370654702186584, -0.04101559519767761, -0.04030206426978111, -0.03193655610084534, -0.03695586323738098, 0.033658869564533234, -0.06347944587469101, -0.03766939043998718, -0.04906124621629715, -0.05314557999372482, -0.052653491497039795, -0.14398515224456787, 0.011674804612994194, -0.11534559726715088, -0.03146907314658165, 0.0034415454138070345, -0.10530698299407959, -0.0005105420132167637, 0.03210878744721413, 0.008273241110146046, 0.1659323126077652, -0.10520856827497482, 0.04802785813808441, -0.09221742302179337, -0.030558709055185318, -0.03917026147246361, 0.02093837410211563, -0.023829396814107895, -0.0017269238596782088, 0.1006813496351242, -0.05358846113085747, -0.0921190083026886, 0.007535108365118504, 0.0231773778796196, -0.06810507923364639, 0.11652660369873047, -0.004883980378508568, -0.01666950434446335, 0.012720493599772453, 0.022020969539880753, -0.09777802973985672, -0.027827616780996323, -0.001899154856801033, 0.07086077332496643, -0.0277538038790226, -0.04116322100162506, -0.026474373415112495, 0.061806343495845795, 0.0706639364361763, -0.12912407517433167, -0.039490118622779846, 0.12912407517433167, -0.007633526343852282, 0.012357577681541443, -0.12636838853359222, 0.047560375183820724, 0.09477628767490387, -0.043156180530786514, -0.04271329939365387, 0.14368990063667297, -0.16199560463428497, 0.0027433945797383785, -0.04640396684408188], 'score': 0.004708940163254738}, page_content='Guidelines for chemical storage safety'), Document(metadata={'_id': '66cf513f36201a6c2ff0dcf0', 'procedureId': 'HEIGHTS-044', 'title': 'Ladder Safety Procedure', 'category': 'working at heights', 'steps': [{'stepNumber': 1, 'description': 'Follow emergency rescue plan'}, {'stepNumber': 2, 'description': 'Maintain three points of contact'}, {'stepNumber': 3, 'description': 'Check equipment and anchor points'}], 'lastUpdated': '2024-08-21T08:53:38.622433', 'combined_info': \"Title: Ladder Safety Procedure Description: Guidelines for ladder safety procedure Category: working at heights Steps: [{'stepNumber': 1, 'description': 'Follow emergency rescue plan'}, {'stepNumber': 2, 'description': 'Maintain three points of contact'}, {'stepNumber': 3, 'description': 'Check equipment and anchor points'}]\", 'embedding': [-0.013004318810999393, 0.030557233840227127, 0.0884319618344307, -0.03755507245659828, -0.018323972821235657, 0.029131747782230377, -0.014630668796598911, 0.03872137889266014, -0.009868250228464603, 0.049140386283397675, -0.07194815576076508, -0.1140388697385788, -0.012894167564809322, -0.035481639206409454, -0.026954641565680504, -0.028146866708993912, 0.02788768708705902, -0.030868249014019966, -0.016393087804317474, 0.026293734088540077, 0.014034556224942207, 0.014825052581727505, 0.10558962821960449, 0.07459178566932678, -0.06785313040018082, -0.029287254437804222, 0.13425485789775848, -0.011144707910716534, -0.036025915294885635, 0.0035993517376482487, 0.14607343077659607, -0.08013822138309479, -0.12191791832447052, 0.13580992817878723, 0.11134340614080429, -0.04139092564582825, -0.020319653674960136, 0.03361554816365242, -0.054842330515384674, -0.047144703567028046, 0.12471705675125122, -0.04206479340791702, -0.05243196338415146, -0.06339524686336517, 0.06977105140686035, -0.0075032394379377365, 0.022989198565483093, 0.034911446273326874, 0.00405291561037302, -0.0005183584871701896, 0.07754643261432648, -0.054272133857011795, 0.05193952098488808, 0.10144275426864624, -0.06147731840610504, -0.07536932826042175, -0.00035596650559455156, -0.06479480862617493, 0.03620734065771103, 0.05670841783285141, 0.03784016892313957, -0.09449675679206848, -0.1398531198501587, 0.04019870236515999, -0.022885527461767197, -0.029753778129816055, -0.04442332312464714, -0.0028509716503322124, 0.03607775270938873, -0.03892872482538223, 0.10325701534748077, -0.001866090577095747, 0.11144707351922989, -0.08806911110877991, -0.04908854886889458, -0.09522245824337006, -0.06759394705295563, 0.05206911265850067, -0.05349459499120712, -0.030946001410484314, -0.02768034301698208, -0.0170539952814579, -0.018103670328855515, -0.012485960498452187, -0.06909719109535217, -0.04579697176814079, -0.03882504999637604, -0.05815982446074486, -0.08464794605970383, -0.06686824560165405, -0.07801295071840286, 0.018038876354694366, -0.01621166244149208, 0.023157665506005287, 0.04190928488969803, 0.015058314427733421, 0.08371489495038986, 0.05155075341463089, -0.06235852837562561, -0.011436284519731998, 0.1226436197757721, -0.03594816103577614, -0.016393087804317474, -0.08402591198682785, -0.032604750245809555, -0.049140386283397675, 0.014475161209702492, 0.08158962428569794, 0.029987039044499397, 0.024323971942067146, -0.1053822860121727, 0.021758098155260086, -0.07070410251617432, 0.1536414623260498, 0.1026349812746048, -0.02274297922849655, 0.10419005900621414, -0.047688983380794525, 0.011643627658486366, -0.002470302162691951, -0.046833690255880356, -0.03449675813317299, 0.04190928488969803, -0.15467818081378937, -0.03480777144432068, -0.011377968825399876, 0.03817710280418396, -0.047999996691942215, 0.03636284917593002, -0.06997839361429214, -0.02104535512626171, 0.00838444847613573, 0.039732180535793304, -0.1253390908241272, -0.026902806013822556, 0.020578831434249878, -0.0199179258197546, 0.037477318197488785, -0.028380127623677254, 0.09853994846343994, 0.06334340572357178, 0.03102375566959381, -0.042583148926496506, 0.06101079657673836, -0.04009503126144409, -0.047974079847335815, -0.059663064777851105, 0.11528293043375015, 0.010282936505973339, 0.015187904238700867, -0.07251835614442825, 0.020125268027186394, -0.04670410230755806, 0.05530885234475136, 0.009051835164427757, -0.12098487466573715, -0.058574508875608444, -0.009622029960155487, 0.04154643416404724, 0.009842331521213055, 0.0026079912204295397, -0.07749459892511368, 0.023416845127940178, 0.01425485871732235, 0.03447084128856659, -0.04387904703617096, -0.014695463702082634, -0.030660904943943024, -0.012349891476333141, 0.028328292071819305, -0.005617710296064615, -0.08412958681583405, 0.0512397363781929, -0.06298055499792099, 0.039110150188207626, -0.08200431615114212, -0.025749458000063896, 0.15198270976543427, -0.029520517215132713, 0.043153345584869385, 0.02703239582479, -0.01819438301026821, -0.0741771012544632, 0.024686822667717934, -0.05432397127151489, 0.03879913315176964, 0.05504967272281647, 0.03449675813317299, 0.002368250396102667, 0.00939524732530117, 0.0017316413577646017, 0.028561552986502647, 0.0342116616666317, -0.04253131523728371, -0.03278617560863495, -0.0313866063952446, -0.06329157203435898, 0.003758099162951112, 0.07816845923662186, 0.014591791667044163, -0.051965437829494476, -0.10864794254302979, 0.10139092057943344, 0.02019006386399269, -0.06308422982692719, 0.07205183058977127, -0.1736501008272171, 0.03809934854507446, 0.007328293286263943, -0.20050106942653656, 0.0270583126693964, 0.018686823546886444, 0.07179265469312668, 0.08853562921285629, -0.0712224543094635, -0.0019616628997027874, -0.035507556051015854, -0.05525701493024826, 0.013697623275220394, 0.053442761301994324, -0.04649675637483597, -0.014773217029869556, 0.023546434938907623, 0.08713605999946594, -0.01266738586127758, 0.047144703567028046, 0.10367169976234436, -0.07904966920614243, 0.11238012462854385, 0.03563714772462845, -0.12399135529994965, -0.016146866604685783, 0.04390496388077736, 0.05209502950310707, 0.059663064777851105, 0.013568033464252949, 0.01882937178015709, -0.05810798704624176, 0.03962850570678711, -0.026591790840029716, 0.08013822138309479, 0.027550753206014633, -0.08972785621881485, 0.039447080343961716, 0.10019870102405548, -0.06012958660721779, 0.045589629560709, 0.0022629587911069393, -0.0077170622535049915, 0.12720517814159393, 0.044578831642866135, -0.04932181164622307, 0.0912310928106308, -0.16193519532680511, -0.023300213739275932, 0.06987472623586655], 'score': 0.004708940163254738}, page_content='Guidelines for ladder safety procedure'), Document(metadata={'_id': '66cf513f36201a6c2ff0dceb', 'procedureId': 'CHEM-039', 'title': 'Chemical Handling Procedure', 'category': 'chemical handling', 'steps': [{'stepNumber': 1, 'description': 'Dispose of chemicals per regulations'}, {'stepNumber': 2, 'description': 'Review Safety Data Sheets (SDS)'}, {'stepNumber': 3, 'description': 'Wear appropriate chemical-resistant PPE'}, {'stepNumber': 4, 'description': 'Label and store chemicals safely'}], 'lastUpdated': '2024-03-25T08:53:38.622386', 'combined_info': \"Title: Chemical Handling Procedure Description: Guidelines for chemical handling procedure Category: chemical handling Steps: [{'stepNumber': 1, 'description': 'Dispose of chemicals per regulations'}, {'stepNumber': 2, 'description': 'Review Safety Data Sheets (SDS)'}, {'stepNumber': 3, 'description': 'Wear appropriate chemical-resistant PPE'}, {'stepNumber': 4, 'description': 'Label and store chemicals safely'}]\", 'embedding': [-0.009836207143962383, 0.05434979870915413, 0.06914732605218887, 0.022956913337111473, -0.038123227655887604, -0.001478024059906602, 0.0067533887922763824, 0.0053675612434744835, -0.04651310294866562, 0.02629903331398964, 0.03839981555938721, -0.11736606061458588, -0.02371753379702568, -0.0239019263535738, -0.007358428090810776, -0.046351756900548935, 0.010683261789381504, 0.10814642161130905, 0.025953296571969986, 0.06651972979307175, -0.026598671451210976, 0.11146549135446548, 0.015085642226040363, 0.096437469124794, -0.07112955302000046, -0.036578934639692307, 0.09104397892951965, -0.021401097998023033, -0.029825547710061073, 0.001577423419803381, 0.14926603436470032, -0.0649062916636467, -0.03563392162322998, 0.004601178225129843, 0.08154774457216263, -0.04628261178731918, -0.045706383883953094, -0.009277266450226307, -0.039805810898542404, -0.012976648285984993, 0.0009450134821236134, -0.023855827748775482, 0.013725744560360909, 0.011415070854127407, 0.001669619814492762, 0.021147556602954865, -0.035726118832826614, 0.045061007142066956, 0.0892922505736351, -0.041234854608774185, 0.07679963111877441, 0.020548280328512192, 0.03471195697784424, 0.01923448219895363, 0.019280578941106796, -0.06006597727537155, 0.03427402675151825, -0.028926631435751915, 0.005338749848306179, -0.017079390585422516, 0.02280709333717823, -0.08145555108785629, -0.06914732605218887, 0.026252934709191322, -0.04365501180291176, -0.05693129822611809, -0.04517625272274017, 0.048218734562397, 0.03775443881750107, -0.0651828795671463, 0.0895688384771347, 0.0018237608019262552, 0.025745855644345284, -0.03395133838057518, -0.06605874747037888, -0.05716178938746452, -0.13405361771583557, 0.002515234053134918, 0.0010825878707692027, -0.06329285353422165, -0.0479421466588974, -0.006136825308203697, -0.03895299509167671, -0.0006507771904580295, -0.09477793425321579, -7.360408926615492e-05, 0.009386749938130379, -0.06135672703385353, -0.09772822260856628, -0.11644409596920013, -0.12400420755147934, -0.024501202628016472, -0.07011538743972778, 0.07463301718235016, 0.008268867619335651, -0.032038260251283646, 0.050247058272361755, 0.036048807203769684, -0.05084633454680443, 0.02669086866080761, 0.06435311585664749, -0.048034343868494034, -0.009674863889813423, 0.0580376572906971, 0.03558782488107681, -0.037224311381578445, -0.08680294454097748, 0.10427416861057281, 0.005779563914984465, 0.0017776625463739038, -0.036578934639692307, -0.02844260074198246, -0.03369779884815216, 0.1696414351463318, 0.04761945828795433, 0.023279599845409393, 0.09173545241355896, -0.017252258956432343, -0.044415634125471115, 0.05052364617586136, -0.12400420755147934, 0.011345923878252506, 0.014059956185519695, -0.15922324359416962, -0.0822853222489357, -0.01711396314203739, 0.004655919969081879, -0.12372761964797974, -0.014601610600948334, -0.0817321389913559, 0.017920682206749916, -0.07062246650457382, 0.00897186528891325, -0.1276920586824417, 0.028327355161309242, 0.04950948432087898, 0.02526182308793068, 0.020571330562233925, 0.009133209474384785, 0.03568002209067345, 0.08293069154024124, -0.026414278894662857, 0.07062246650457382, 0.0956077054142952, -0.09450134634971619, -0.08657245337963104, -0.12050074338912964, 0.10206145793199539, 0.019476497545838356, 0.039552271366119385, -0.10805422067642212, 0.10482734441757202, 0.022899290546774864, -0.007923130877315998, 0.09284181147813797, -0.13967759907245636, 0.017010241746902466, 0.0688246414065361, 0.10335220396518707, -0.0988345816731453, 0.03556477651000023, -0.03213045746088028, -0.032683636993169785, -0.06260138005018234, 0.07753720134496689, -0.02526182308793068, 0.02247288078069687, -0.018450811505317688, -0.007807885762304068, 0.031853869557380676, -0.0712217465043068, 0.007877033203840256, 0.035864412784576416, -0.04729676991701126, -0.03888384625315666, -0.027059653773903847, 0.01139778457581997, 0.08279240131378174, -0.0445769764482975, 0.09809701144695282, 0.08518950641155243, 0.05485687777400017, -0.07795208692550659, -0.020502181723713875, -0.012123831547796726, 0.019522596150636673, 0.03533428534865379, -0.04734287038445473, -0.05006266385316849, -0.004915222525596619, -0.0021003501024097204, 0.032937176525592804, -0.014682282693684101, 0.007225895766168833, -0.13589754700660706, 0.009968739934265614, 0.04923289641737938, -0.01736750453710556, 0.009761298075318336, -0.04420819133520126, -0.05052364617586136, -0.07343446463346481, -0.052044887095689774, 0.007145223673433065, -0.07974991947412491, -0.024385957047343254, -0.12723107635974884, -0.04307878389954567, 0.029687251895666122, -0.14797528088092804, -0.009726723656058311, 0.015719491988420486, 0.047388967126607895, 0.15913105010986328, -0.14474840462207794, 0.01588083617389202, -0.06601265072822571, 0.019453447312116623, -0.02486998774111271, 0.015039543621242046, -0.03899909183382988, -0.010625638999044895, 0.11035913228988647, 0.00031188325374387205, -0.0855121910572052, 0.03201521188020706, 0.014025382697582245, -0.041234854608774185, 0.06444530934095383, -0.06914732605218887, -0.0511690229177475, 0.01788610778748989, 0.03183082118630409, -0.02132042497396469, -0.026898309588432312, 0.007369952742010355, 0.0514456108212471, -0.03946007415652275, -0.017782388255000114, -0.06131063029170036, 0.06647363305091858, 0.0827002003788948, -0.10547272115945816, -0.03803103044629097, 0.13986198604106903, -0.034504517912864685, 0.03886079788208008, -0.10086289793252945, 0.092242531478405, 0.07564717531204224, -0.029894694685935974, -0.060665253549814224, 0.12640132009983063, -0.19730037450790405, -0.015984557569026947, -0.01949954591691494], 'score': 0.004708940163254738}, page_content='Guidelines for chemical handling procedure'), Document(metadata={'_id': '66cf513f36201a6c2ff0dce8', 'procedureId': 'CHEM-036', 'title': 'Chemical Handling Procedure', 'category': 'chemical handling', 'steps': [{'stepNumber': 1, 'description': 'Dispose of chemicals per regulations'}, {'stepNumber': 2, 'description': 'Review Safety Data Sheets (SDS)'}, {'stepNumber': 3, 'description': 'Wear appropriate chemical-resistant PPE'}, {'stepNumber': 4, 'description': 'Label and store chemicals safely'}], 'lastUpdated': '2024-08-19T08:53:38.622356', 'combined_info': \"Title: Chemical Handling Procedure Description: Guidelines for chemical handling procedure Category: chemical handling Steps: [{'stepNumber': 1, 'description': 'Dispose of chemicals per regulations'}, {'stepNumber': 2, 'description': 'Review Safety Data Sheets (SDS)'}, {'stepNumber': 3, 'description': 'Wear appropriate chemical-resistant PPE'}, {'stepNumber': 4, 'description': 'Label and store chemicals safely'}]\", 'embedding': [-0.009836207143962383, 0.05434979870915413, 0.06914732605218887, 0.022956913337111473, -0.038123227655887604, -0.001478024059906602, 0.0067533887922763824, 0.0053675612434744835, -0.04651310294866562, 0.02629903331398964, 0.03839981555938721, -0.11736606061458588, -0.02371753379702568, -0.0239019263535738, -0.007358428090810776, -0.046351756900548935, 0.010683261789381504, 0.10814642161130905, 0.025953296571969986, 0.06651972979307175, -0.026598671451210976, 0.11146549135446548, 0.015085642226040363, 0.096437469124794, -0.07112955302000046, -0.036578934639692307, 0.09104397892951965, -0.021401097998023033, -0.029825547710061073, 0.001577423419803381, 0.14926603436470032, -0.0649062916636467, -0.03563392162322998, 0.004601178225129843, 0.08154774457216263, -0.04628261178731918, -0.045706383883953094, -0.009277266450226307, -0.039805810898542404, -0.012976648285984993, 0.0009450134821236134, -0.023855827748775482, 0.013725744560360909, 0.011415070854127407, 0.001669619814492762, 0.021147556602954865, -0.035726118832826614, 0.045061007142066956, 0.0892922505736351, -0.041234854608774185, 0.07679963111877441, 0.020548280328512192, 0.03471195697784424, 0.01923448219895363, 0.019280578941106796, -0.06006597727537155, 0.03427402675151825, -0.028926631435751915, 0.005338749848306179, -0.017079390585422516, 0.02280709333717823, -0.08145555108785629, -0.06914732605218887, 0.026252934709191322, -0.04365501180291176, -0.05693129822611809, -0.04517625272274017, 0.048218734562397, 0.03775443881750107, -0.0651828795671463, 0.0895688384771347, 0.0018237608019262552, 0.025745855644345284, -0.03395133838057518, -0.06605874747037888, -0.05716178938746452, -0.13405361771583557, 0.002515234053134918, 0.0010825878707692027, -0.06329285353422165, -0.0479421466588974, -0.006136825308203697, -0.03895299509167671, -0.0006507771904580295, -0.09477793425321579, -7.360408926615492e-05, 0.009386749938130379, -0.06135672703385353, -0.09772822260856628, -0.11644409596920013, -0.12400420755147934, -0.024501202628016472, -0.07011538743972778, 0.07463301718235016, 0.008268867619335651, -0.032038260251283646, 0.050247058272361755, 0.036048807203769684, -0.05084633454680443, 0.02669086866080761, 0.06435311585664749, -0.048034343868494034, -0.009674863889813423, 0.0580376572906971, 0.03558782488107681, -0.037224311381578445, -0.08680294454097748, 0.10427416861057281, 0.005779563914984465, 0.0017776625463739038, -0.036578934639692307, -0.02844260074198246, -0.03369779884815216, 0.1696414351463318, 0.04761945828795433, 0.023279599845409393, 0.09173545241355896, -0.017252258956432343, -0.044415634125471115, 0.05052364617586136, -0.12400420755147934, 0.011345923878252506, 0.014059956185519695, -0.15922324359416962, -0.0822853222489357, -0.01711396314203739, 0.004655919969081879, -0.12372761964797974, -0.014601610600948334, -0.0817321389913559, 0.017920682206749916, -0.07062246650457382, 0.00897186528891325, -0.1276920586824417, 0.028327355161309242, 0.04950948432087898, 0.02526182308793068, 0.020571330562233925, 0.009133209474384785, 0.03568002209067345, 0.08293069154024124, -0.026414278894662857, 0.07062246650457382, 0.0956077054142952, -0.09450134634971619, -0.08657245337963104, -0.12050074338912964, 0.10206145793199539, 0.019476497545838356, 0.039552271366119385, -0.10805422067642212, 0.10482734441757202, 0.022899290546774864, -0.007923130877315998, 0.09284181147813797, -0.13967759907245636, 0.017010241746902466, 0.0688246414065361, 0.10335220396518707, -0.0988345816731453, 0.03556477651000023, -0.03213045746088028, -0.032683636993169785, -0.06260138005018234, 0.07753720134496689, -0.02526182308793068, 0.02247288078069687, -0.018450811505317688, -0.007807885762304068, 0.031853869557380676, -0.0712217465043068, 0.007877033203840256, 0.035864412784576416, -0.04729676991701126, -0.03888384625315666, -0.027059653773903847, 0.01139778457581997, 0.08279240131378174, -0.0445769764482975, 0.09809701144695282, 0.08518950641155243, 0.05485687777400017, -0.07795208692550659, -0.020502181723713875, -0.012123831547796726, 0.019522596150636673, 0.03533428534865379, -0.04734287038445473, -0.05006266385316849, -0.004915222525596619, -0.0021003501024097204, 0.032937176525592804, -0.014682282693684101, 0.007225895766168833, -0.13589754700660706, 0.009968739934265614, 0.04923289641737938, -0.01736750453710556, 0.009761298075318336, -0.04420819133520126, -0.05052364617586136, -0.07343446463346481, -0.052044887095689774, 0.007145223673433065, -0.07974991947412491, -0.024385957047343254, -0.12723107635974884, -0.04307878389954567, 0.029687251895666122, -0.14797528088092804, -0.009726723656058311, 0.015719491988420486, 0.047388967126607895, 0.15913105010986328, -0.14474840462207794, 0.01588083617389202, -0.06601265072822571, 0.019453447312116623, -0.02486998774111271, 0.015039543621242046, -0.03899909183382988, -0.010625638999044895, 0.11035913228988647, 0.00031188325374387205, -0.0855121910572052, 0.03201521188020706, 0.014025382697582245, -0.041234854608774185, 0.06444530934095383, -0.06914732605218887, -0.0511690229177475, 0.01788610778748989, 0.03183082118630409, -0.02132042497396469, -0.026898309588432312, 0.007369952742010355, 0.0514456108212471, -0.03946007415652275, -0.017782388255000114, -0.06131063029170036, 0.06647363305091858, 0.0827002003788948, -0.10547272115945816, -0.03803103044629097, 0.13986198604106903, -0.034504517912864685, 0.03886079788208008, -0.10086289793252945, 0.092242531478405, 0.07564717531204224, -0.029894694685935974, -0.060665253549814224, 0.12640132009983063, -0.19730037450790405, -0.015984557569026947, -0.01949954591691494], 'score': 0.004708940163254738}, page_content='Guidelines for chemical handling procedure')]\n"
          ]
        }
      ],
      "source": [
        "print(full_text_search_result)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jbg6qsphi0RC"
      },
      "source": [
        "## MongoDB Checkpointer\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 72,
      "metadata": {
        "id": "F_q3Fr89iyqd"
      },
      "outputs": [],
      "source": [
        "import pickle\n",
        "from collections.abc import AsyncIterator\n",
        "from contextlib import AbstractContextManager\n",
        "from datetime import datetime, timezone\n",
        "from types import TracebackType\n",
        "from typing import Any, Dict, List, Optional, Tuple, Union\n",
        "\n",
        "from langchain_core.runnables import RunnableConfig\n",
        "from langgraph.checkpoint.base import (\n",
        "    BaseCheckpointSaver,\n",
        "    Checkpoint,\n",
        "    CheckpointMetadata,\n",
        "    CheckpointTuple,\n",
        "    SerializerProtocol,\n",
        ")\n",
        "from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer\n",
        "from motor.motor_asyncio import AsyncIOMotorClient\n",
        "from typing_extensions import Self\n",
        "\n",
        "\n",
        "class JsonPlusSerializerCompat(JsonPlusSerializer):\n",
        "    def loads(self, data: bytes) -> Any:\n",
        "        if data.startswith(b\"\\x80\") and data.endswith(b\".\"):\n",
        "            return pickle.loads(data)\n",
        "        return super().loads(data)\n",
        "\n",
        "\n",
        "class MongoDBSaver(AbstractContextManager, BaseCheckpointSaver):\n",
        "    serde = JsonPlusSerializerCompat()\n",
        "\n",
        "    client: AsyncIOMotorClient\n",
        "    db_name: str\n",
        "    collection_name: str\n",
        "\n",
        "    def __init__(\n",
        "        self,\n",
        "        client: AsyncIOMotorClient,\n",
        "        db_name: str,\n",
        "        collection_name: str,\n",
        "        *,\n",
        "        serde: Optional[SerializerProtocol] = None,\n",
        "    ) -> None:\n",
        "        super().__init__(serde=serde)\n",
        "        self.client = client\n",
        "        self.db_name = db_name\n",
        "        self.collection_name = collection_name\n",
        "        self.collection = client[db_name][collection_name]\n",
        "\n",
        "    def __enter__(self) -> Self:\n",
        "        return self\n",
        "\n",
        "    def __exit__(\n",
        "        self,\n",
        "        __exc_type: Optional[type[BaseException]],\n",
        "        __exc_value: Optional[BaseException],\n",
        "        __traceback: Optional[TracebackType],\n",
        "    ) -> Optional[bool]:\n",
        "        return True\n",
        "\n",
        "    async def aget_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]:\n",
        "        if config[\"configurable\"].get(\"thread_ts\"):\n",
        "            query = {\n",
        "                \"thread_id\": config[\"configurable\"][\"thread_id\"],\n",
        "                \"thread_ts\": config[\"configurable\"][\"thread_ts\"],\n",
        "            }\n",
        "        else:\n",
        "            query = {\"thread_id\": config[\"configurable\"][\"thread_id\"]}\n",
        "\n",
        "        doc = await self.collection.find_one(query, sort=[(\"thread_ts\", -1)])\n",
        "        if doc:\n",
        "            return CheckpointTuple(\n",
        "                config,\n",
        "                self.serde.loads(doc[\"checkpoint\"]),\n",
        "                self.serde.loads(doc[\"metadata\"]),\n",
        "                (\n",
        "                    {\n",
        "                        \"configurable\": {\n",
        "                            \"thread_id\": doc[\"thread_id\"],\n",
        "                            \"thread_ts\": doc[\"parent_ts\"],\n",
        "                        }\n",
        "                    }\n",
        "                    if doc.get(\"parent_ts\")\n",
        "                    else None\n",
        "                ),\n",
        "            )\n",
        "        return None\n",
        "\n",
        "    async def alist(\n",
        "        self,\n",
        "        config: Optional[RunnableConfig],\n",
        "        *,\n",
        "        filter: Optional[Dict[str, Any]] = None,\n",
        "        before: Optional[RunnableConfig] = None,\n",
        "        limit: Optional[int] = None,\n",
        "    ) -> AsyncIterator[CheckpointTuple]:\n",
        "        query = {}\n",
        "        if config is not None:\n",
        "            query[\"thread_id\"] = config[\"configurable\"][\"thread_id\"]\n",
        "        if filter:\n",
        "            for key, value in filter.items():\n",
        "                query[f\"metadata.{key}\"] = value\n",
        "        if before is not None:\n",
        "            query[\"thread_ts\"] = {\"$lt\": before[\"configurable\"][\"thread_ts\"]}\n",
        "\n",
        "        cursor = self.collection.find(query).sort(\"thread_ts\", -1)\n",
        "        if limit:\n",
        "            cursor = cursor.limit(limit)\n",
        "\n",
        "        async for doc in cursor:\n",
        "            yield CheckpointTuple(\n",
        "                {\n",
        "                    \"configurable\": {\n",
        "                        \"thread_id\": doc[\"thread_id\"],\n",
        "                        \"thread_ts\": doc[\"thread_ts\"],\n",
        "                    }\n",
        "                },\n",
        "                self.serde.loads(doc[\"checkpoint\"]),\n",
        "                self.serde.loads(doc[\"metadata\"]),\n",
        "                (\n",
        "                    {\n",
        "                        \"configurable\": {\n",
        "                            \"thread_id\": doc[\"thread_id\"],\n",
        "                            \"thread_ts\": doc[\"parent_ts\"],\n",
        "                        }\n",
        "                    }\n",
        "                    if doc.get(\"parent_ts\")\n",
        "                    else None\n",
        "                ),\n",
        "            )\n",
        "\n",
        "    async def aput(\n",
        "        self,\n",
        "        config: RunnableConfig,\n",
        "        checkpoint: Checkpoint,\n",
        "        metadata: CheckpointMetadata,\n",
        "        new_versions: Optional[dict[str, Union[str, float, int]]],\n",
        "    ) -> RunnableConfig:\n",
        "        doc = {\n",
        "            \"thread_id\": config[\"configurable\"][\"thread_id\"],\n",
        "            \"thread_ts\": checkpoint[\"id\"],\n",
        "            \"checkpoint\": self.serde.dumps(checkpoint),\n",
        "            \"metadata\": self.serde.dumps(metadata),\n",
        "        }\n",
        "        if config[\"configurable\"].get(\"thread_ts\"):\n",
        "            doc[\"parent_ts\"] = config[\"configurable\"][\"thread_ts\"]\n",
        "        await self.collection.insert_one(doc)\n",
        "        return {\n",
        "            \"configurable\": {\n",
        "                \"thread_id\": config[\"configurable\"][\"thread_id\"],\n",
        "                \"thread_ts\": checkpoint[\"id\"],\n",
        "            }\n",
        "        }\n",
        "\n",
        "    # Implement synchronous methods as well for compatibility\n",
        "    def get_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]:\n",
        "        raise NotImplementedError(\"Use aget_tuple for asynchronous operations\")\n",
        "\n",
        "    def list(\n",
        "        self,\n",
        "        config: Optional[RunnableConfig],\n",
        "        *,\n",
        "        filter: Optional[Dict[str, Any]] = None,\n",
        "        before: Optional[RunnableConfig] = None,\n",
        "        limit: Optional[int] = None,\n",
        "    ):\n",
        "        raise NotImplementedError(\"Use alist for asynchronous operations\")\n",
        "\n",
        "    def put(\n",
        "        self,\n",
        "        config: RunnableConfig,\n",
        "        checkpoint: Checkpoint,\n",
        "        metadata: CheckpointMetadata,\n",
        "    ) -> RunnableConfig:\n",
        "        raise NotImplementedError(\"Use aput for asynchronous operations\")\n",
        "\n",
        "    async def aput_writes(\n",
        "        self,\n",
        "        config: RunnableConfig,\n",
        "        writes: List[Tuple[str, Any]],\n",
        "        task_id: str,\n",
        "    ) -> None:\n",
        "        \"\"\"Asynchronously store intermediate writes linked to a checkpoint.\"\"\"\n",
        "        docs = []\n",
        "        for channel, value in writes:\n",
        "            doc = {\n",
        "                \"thread_id\": config[\"configurable\"][\"thread_id\"],\n",
        "                \"task_id\": task_id,\n",
        "                \"channel\": channel,\n",
        "                \"value\": self.serde.dumps(value),\n",
        "                \"timestamp\": datetime.now(timezone.utc).isoformat(),\n",
        "            }\n",
        "            docs.append(doc)\n",
        "\n",
        "        if docs:\n",
        "            await self.collection.insert_many(docs)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "N-XJmokEi9OQ"
      },
      "source": [
        "## Tool Definitions"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 73,
      "metadata": {
        "id": "IKxfqqv4i8np"
      },
      "outputs": [],
      "source": [
        "from typing import Any, Dict\n",
        "\n",
        "from langchain.agents import tool\n",
        "\n",
        "\n",
        "@tool\n",
        "def safety_procedures_vector_search_tool(query: str, k: int = 5):\n",
        "    \"\"\"\n",
        "    Perform a vector similarity search on safety procedures.\n",
        "\n",
        "    Args:\n",
        "        query (str): The search query string.\n",
        "        k (int, optional): Number of top results to return. Defaults to 5.\n",
        "\n",
        "    Returns:\n",
        "        list: List of tuples (Document, score), where Document is a safety procedure\n",
        "              and score is the similarity score (lower is more similar).\n",
        "\n",
        "    Note:\n",
        "        Uses the global vector_store_safety_procedures for the search.\n",
        "    \"\"\"\n",
        "\n",
        "    vector_search_results = vector_store_safety_procedures.similarity_search_with_score(\n",
        "        query=query, k=k\n",
        "    )\n",
        "    return vector_search_results\n",
        "\n",
        "\n",
        "@tool\n",
        "def safety_procedures_full_text_search_tool(query: str, k: int = 5):\n",
        "    \"\"\"\n",
        "    Perform a full-text search on safety procedures.\n",
        "\n",
        "    Args:\n",
        "        query (str): The search query string.\n",
        "        k (int, optional): Number of top results to return. Defaults to 5.\n",
        "\n",
        "    Returns:\n",
        "        list: Relevant safety procedure documents matching the query.\n",
        "    \"\"\"\n",
        "\n",
        "    full_text_search = MongoDBAtlasFullTextSearchRetriever(\n",
        "        collection=safety_procedure_collection,\n",
        "        search_index_name=\"text_search_index\",\n",
        "        search_field=\"description\",\n",
        "        top_k=k,\n",
        "    )\n",
        "\n",
        "    full_text_search_result = full_text_search.get_relevant_documents(query)\n",
        "\n",
        "\n",
        "@tool\n",
        "def safety_procedures_hybrid_search_tool(query: str):\n",
        "    \"\"\"\n",
        "    Perform a hybrid (vector + full-text) search on safety procedures.\n",
        "\n",
        "    Args:\n",
        "        query (str): The search query string.\n",
        "\n",
        "    Returns:\n",
        "        list: Relevant safety procedure documents from hybrid search.\n",
        "\n",
        "    Note:\n",
        "        Uses both vector_store_safety_procedures and text_search_index.\n",
        "    \"\"\"\n",
        "\n",
        "    hybrid_search = MongoDBAtlasHybridSearchRetriever(\n",
        "        vectorstore=vector_store_safety_procedures,\n",
        "        search_index_name=\"text_search_index\",\n",
        "        top_k=5,\n",
        "    )\n",
        "\n",
        "    hybrid_search_result = hybrid_search.get_relevant_documents(query)\n",
        "\n",
        "    return hybrid_search_result"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 74,
      "metadata": {
        "id": "E-Zv2wFlnAGS"
      },
      "outputs": [],
      "source": [
        "from typing import List\n",
        "\n",
        "from pydantic import BaseModel, Field\n",
        "\n",
        "\n",
        "class Step(BaseModel):\n",
        "    stepNumber: int = Field(..., ge=1)\n",
        "    description: str\n",
        "\n",
        "\n",
        "class SafetyProcedure(BaseModel):\n",
        "    procedureId: str\n",
        "    title: str\n",
        "    description: str\n",
        "    category: str\n",
        "    steps: List[Step]\n",
        "    lastUpdated: datetime = Field(default_factory=datetime.now)\n",
        "\n",
        "\n",
        "def create_safety_procedure_document(procedure_data: dict) -> dict:\n",
        "    \"\"\"\n",
        "    Create a new safety procedure document from a dictionary, using Pydantic for validation.\n",
        "\n",
        "    Args:\n",
        "    procedure_data (dict): Dictionary representing the new safety procedure\n",
        "\n",
        "    Returns:\n",
        "    dict: Validated and formatted safety procedure document\n",
        "\n",
        "    Raises:\n",
        "    ValidationError: If the input data doesn't match the SafetyProcedure schema\n",
        "    \"\"\"\n",
        "    try:\n",
        "        # Create a SafetyProcedure instance, which will validate the data\n",
        "        safety_procedure = SafetyProcedure(**procedure_data)\n",
        "\n",
        "        # Convert the Pydantic model to a dictionary\n",
        "        document = safety_procedure.dict()\n",
        "\n",
        "        # Ensure steps are properly numbered\n",
        "        for i, step in enumerate(document[\"steps\"], start=1):\n",
        "            step[\"stepNumber\"] = i\n",
        "\n",
        "        return document\n",
        "    except Exception as e:\n",
        "        raise ValueError(f\"Invalid safety procedure data: {e!s}\")\n",
        "\n",
        "\n",
        "# Tool to add new safety procedures\n",
        "@tool\n",
        "def create_new_safety_procedures(new_procedure: dict):\n",
        "    \"\"\"\n",
        "    Create and validate a new safety procedure document.\n",
        "\n",
        "    Args:\n",
        "        new_procedure (dict): Dictionary containing the new safety procedure data.\n",
        "\n",
        "    Returns:\n",
        "        dict: Validated and formatted safety procedure document.\n",
        "\n",
        "    Raises:\n",
        "        ValueError: If the input data is invalid or doesn't match the required schema.\n",
        "\n",
        "    Note:\n",
        "        Uses Pydantic for data validation via create_safety_procedure_document function.\n",
        "    \"\"\"\n",
        "    new_safety_procedure_document = create_safety_procedure_document(new_procedure)\n",
        "    return new_safety_procedure_document"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 75,
      "metadata": {
        "id": "DoJYaY2Oxk17"
      },
      "outputs": [],
      "source": [
        "vector_store_accident_reports = MongoDBAtlasVectorSearch.from_connection_string(\n",
        "    connection_string=MONGO_URI,\n",
        "    namespace=DB_NAME + \".\" + ACCIDENTS_REPORT_COLLECTION,\n",
        "    embedding=embedding_model,\n",
        "    index_name=ATLAS_VECTOR_SEARCH_INDEX,\n",
        "    text_key=\"combined_info\",\n",
        ")\n",
        "\n",
        "\n",
        "@tool\n",
        "def accident_reports_vector_search_tool(query: str, k: int = 5):\n",
        "    \"\"\"\n",
        "    Perform a vector similarity search on accident reports.\n",
        "\n",
        "    Args:\n",
        "        query (str): The search query string.\n",
        "        k (int, optional): Number of top results to return. Defaults to 5.\n",
        "\n",
        "    Returns:\n",
        "        list: List of tuples (Document, score), where Document is an accident report\n",
        "              and score is the similarity score (lower is more similar).\n",
        "\n",
        "    Note:\n",
        "        Uses the global vector_store_accident_reports for the search.\n",
        "    \"\"\"\n",
        "    vector_search_results = vector_store_accident_reports.similarity_search_with_score(\n",
        "        query=query, k=k\n",
        "    )\n",
        "    return vector_search_results\n",
        "\n",
        "\n",
        "@tool\n",
        "def accident_reports_full_text_search_tool(query: str, k: int = 5):\n",
        "    \"\"\"\n",
        "    Perform a full-text search on accident reports.\n",
        "\n",
        "    Args:\n",
        "        query (str): The search query string.\n",
        "        k (int, optional): Number of top results to return. Defaults to 5.\n",
        "\n",
        "    Returns:\n",
        "        list: Relevant accident report documents matching the query.\n",
        "    \"\"\"\n",
        "    full_text_search = MongoDBAtlasFullTextSearchRetriever(\n",
        "        collection=accident_report_collection,\n",
        "        search_index_name=\"text_search_index\",\n",
        "        search_field=\"description\",\n",
        "        top_k=k,\n",
        "    )\n",
        "\n",
        "    return full_text_search.get_relevant_documents(query)\n",
        "\n",
        "\n",
        "@tool\n",
        "def accident_reports_hybrid_search_tool(query: str):\n",
        "    \"\"\"\n",
        "    Perform a hybrid (vector + full-text) search on accident reports.\n",
        "\n",
        "    Args:\n",
        "        query (str): The search query string.\n",
        "\n",
        "    Returns:\n",
        "        list: Relevant accident report documents from hybrid search.\n",
        "\n",
        "    Note:\n",
        "        Uses both vector_store_accident_reports and accident_text_search_index.\n",
        "    \"\"\"\n",
        "    hybrid_search = MongoDBAtlasHybridSearchRetriever(\n",
        "        vectorstore=vector_store_accident_reports,\n",
        "        search_index_name=\"text_search_index\",\n",
        "        top_k=5,\n",
        "    )\n",
        "\n",
        "    return hybrid_search.get_relevant_documents(query)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 76,
      "metadata": {
        "id": "TczlKq9VyKvA"
      },
      "outputs": [],
      "source": [
        "@tool\n",
        "def create_new_accident_report(new_report: dict):\n",
        "    \"\"\"\n",
        "    Create and validate a new accident report document.\n",
        "\n",
        "    Args:\n",
        "        new_report (dict): Dictionary containing the new accident report data.\n",
        "\n",
        "    Returns:\n",
        "        dict: Validated and formatted accident report document.\n",
        "\n",
        "    Raises:\n",
        "        ValueError: If the input data is invalid or doesn't match the required schema.\n",
        "\n",
        "    Note:\n",
        "        This function should implement proper validation and formatting for accident reports.\n",
        "    \"\"\"\n",
        "    # This is a placeholder. You'll need to implement the actual creation logic\n",
        "    # similar to how you've done it for safety procedures.\n",
        "    return new_report  # This should be replaced with actual implementation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 77,
      "metadata": {
        "id": "GjdNOxnCrZEv"
      },
      "outputs": [],
      "source": [
        "safety_procedure_collection_tools = [\n",
        "    safety_procedures_vector_search_tool,\n",
        "    safety_procedures_full_text_search_tool,\n",
        "    safety_procedures_hybrid_search_tool,\n",
        "    create_new_safety_procedures,\n",
        "]\n",
        "\n",
        "accident_report_collection_tools = [\n",
        "    accident_reports_vector_search_tool,\n",
        "    accident_reports_full_text_search_tool,\n",
        "    accident_reports_hybrid_search_tool,\n",
        "    create_new_accident_report,\n",
        "]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5cVYxfbSq7Ek"
      },
      "source": [
        "## LLM Defintion"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 78,
      "metadata": {
        "id": "Y6pF1DSoq9B5"
      },
      "outputs": [],
      "source": [
        "from langchain_anthropic import ChatAnthropic\n",
        "\n",
        "# llm = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",
        "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\", temperature=0)\n",
        "\n",
        "# llm = ChatGroq(\n",
        "#     model=\"llama3-groq-70b-8192-tool-use-preview\", #\n",
        "#     temperature=0,\n",
        "#     max_tokens=None,\n",
        "#     timeout=None,\n",
        "#     # other params...\n",
        "# )"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zdujfkT0rCBy"
      },
      "source": [
        "## Agent Definition"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 79,
      "metadata": {
        "id": "HqPfIuRKrERS"
      },
      "outputs": [],
      "source": [
        "from datetime import datetime\n",
        "\n",
        "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
        "\n",
        "\n",
        "def create_agent(llm, tools, system_message: str):\n",
        "    \"\"\"Create an agent.\"\"\"\n",
        "\n",
        "    prompt = ChatPromptTemplate.from_messages(\n",
        "        [\n",
        "            (\n",
        "                \"system\",\n",
        "                \"You are a helpful AI assistant, collaborating with other assistants.\"\n",
        "                \" Use the provided tools to progress towards answering the question.\"\n",
        "                \" If you are unable to fully answer, that's OK, another assistant with different tools \"\n",
        "                \" will help where you left off. Execute what you can to make progress.\"\n",
        "                \" If you or any of the other assistants have the final answer or deliverable,\"\n",
        "                \" prefix your response with FINAL ANSWER so the team knows to stop.\"\n",
        "                \" You have access to the following tools: {tool_names}.\\n{system_message}\"\n",
        "                \"\\nCurrent time: {time}.\",\n",
        "            ),\n",
        "            MessagesPlaceholder(variable_name=\"messages\"),\n",
        "        ]\n",
        "    )\n",
        "    prompt = prompt.partial(system_message=system_message)\n",
        "    prompt = prompt.partial(time=lambda: str(datetime.now()))\n",
        "    prompt = prompt.partial(tool_names=\", \".join([tool.name for tool in tools]))\n",
        "\n",
        "    return prompt | llm.bind_tools(tools)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 80,
      "metadata": {
        "id": "KHMlWAH4rH5x"
      },
      "outputs": [],
      "source": [
        "# Chatbot agent and node\n",
        "toolbox = []\n",
        "\n",
        "# Add tools\n",
        "toolbox.extend(safety_procedure_collection_tools)\n",
        "toolbox.extend(accident_report_collection_tools)\n",
        "\n",
        "# Create Agent\n",
        "chatbot_agent = create_agent(\n",
        "    llm,\n",
        "    toolbox,\n",
        "    system_message=\"\"\"\n",
        "      You are an advanced Factory Safety Assistant Agent specializing in managing and providing information about safety procedures and accident reports in industrial settings. Your key responsibilities include:\n",
        "\n",
        "      1. Searching and retrieving safety procedures and accident reports:\n",
        "        - Use the provided search tools to find relevant safety procedures and accident reports based on user queries\n",
        "        - Interpret and explain safety procedures and accident reports in detail\n",
        "        - Provide context and additional information related to specific safety protocols and past incidents\n",
        "\n",
        "      2. Creating new safety procedures and accident reports:\n",
        "        - When provided with appropriate information, use the create_new_safety_procedures tool to generate new safety procedure documents\n",
        "        - Use the create_new_accident_report tool to document new accidents or incidents\n",
        "        - Ensure all necessary details are included in new procedures and reports\n",
        "\n",
        "      3. Answering safety-related queries:\n",
        "        - Respond to questions about safety protocols, best practices, regulations, and past incidents\n",
        "        - Offer explanations and clarifications on complex safety issues\n",
        "        - Provide step-by-step guidance on implementing safety procedures and handling incidents\n",
        "\n",
        "      4. Assisting with safety compliance and incident prevention:\n",
        "        - Help identify relevant safety procedures for specific tasks or situations\n",
        "        - Advise on how to adhere to safety guidelines and regulations\n",
        "        - Suggest improvements or updates to existing safety procedures based on past incidents\n",
        "        - Analyze accident reports to identify trends and recommend preventive measures\n",
        "\n",
        "      5. Supporting safety training and awareness:\n",
        "        - Explain the importance and rationale behind safety procedures\n",
        "        - Offer tips and best practices for maintaining a safe work environment\n",
        "        - Help users understand the potential risks and consequences of not following safety procedures\n",
        "        - Use past incident reports to illustrate the importance of safety measures\n",
        "\n",
        "        6. Providing Structured Safety Advice:\n",
        "   When users ask for safety procedures advice, provide information in the following structured format:\n",
        "\n",
        "   Safety Procedure Advice:\n",
        "   a. Relevant Procedure:\n",
        "      - Title: [Procedure Title]\n",
        "      - ID: [Procedure ID]\n",
        "      - Description: [Brief description of the procedure]\n",
        "      - Key Steps:\n",
        "        1. [Step 1]\n",
        "        2. [Step 2]\n",
        "        3. [...]\n",
        "\n",
        "   b. Related Incidents (Past 2 Years):\n",
        "      - Incident 1:\n",
        "        - IncidentID: [ID of the Incident document]\n",
        "        - Date: [Date of incident]\n",
        "        - Description: [Brief description of the incident]\n",
        "        - Root Cause(s): [Identified root cause(s)]\n",
        "      - Incident 2:\n",
        "        - [Same structure as Incident 1]\n",
        "      - [Additional incidents if applicable]\n",
        "\n",
        "   c. Possible Root Causes:\n",
        "      - [List of potential root causes based on the procedure and related incidents]\n",
        "\n",
        "   d. Additional Safety Recommendations:\n",
        "      - [Any extra safety tips or precautions based on the procedure and incident history]\n",
        "\n",
        "   e. References:\n",
        "      - Safety Procedure: [Reference to the specific safety procedure document]\n",
        "      - Incident Reports: [References to the relevant incident reports]\n",
        "\n",
        "When providing this structured advice:\n",
        "- Use the safety procedure search tools to find the most relevant procedure.\n",
        "- Utilize the accident report search tools to identify related incidents from the past two years in the same region.\n",
        "- Analyze the incident reports to identify common or significant root causes.\n",
        "- Provide additional recommendations based on your analysis of both the procedure and the incident history.\n",
        "- Always include clear references to the source documents for both procedures and incident reports.\n",
        "\n",
        "\n",
        "      When creating a new safety procedure, ensure you have all required information and use the create_new_safety_procedures tool. The required fields are:\n",
        "      - procedureId\n",
        "      - title\n",
        "      - description\n",
        "      - category\n",
        "      - steps (a list of step objects, each with a stepNumber and description)\n",
        "\n",
        "      When creating a new accident report, use the create_new_accident_report tool. Ensure you gather all necessary information about the incident.\n",
        "\n",
        "      Provide detailed, accurate, and helpful information to support factory workers, managers, and safety officers in maintaining a safe work environment and properly documenting incidents. If you cannot find specific information or if the information requested is not available, clearly state this and offer to assist in creating a new procedure or report if appropriate.\n",
        "\n",
        "      When discussing safety matters, always prioritize the well-being of workers and adherence to safety regulations. Use information from accident reports to reinforce the importance of following safety procedures and to suggest improvements in safety protocols.\n",
        "\n",
        "      DO NOT MAKE UP ANY INFORMATION.\n",
        "    \"\"\",\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "gbuA68uMsHtV"
      },
      "source": [
        "## State Definition\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 81,
      "metadata": {
        "id": "QOwbsd1csGpr"
      },
      "outputs": [],
      "source": [
        "import operator\n",
        "from typing import Annotated, TypedDict\n",
        "\n",
        "from langchain_core.messages import BaseMessage\n",
        "\n",
        "\n",
        "class AgentState(TypedDict):\n",
        "    messages: Annotated[List[BaseMessage], operator.add]\n",
        "    sender: str"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "E1VZ2I2nsKzj"
      },
      "source": [
        "## Node Definition"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 82,
      "metadata": {
        "id": "T_eRgggEsL5v"
      },
      "outputs": [],
      "source": [
        "import functools\n",
        "\n",
        "from langchain_core.messages import AIMessage, ToolMessage\n",
        "\n",
        "\n",
        "def agent_node(state, agent, name):\n",
        "    result = agent.invoke(state)\n",
        "    if isinstance(result, ToolMessage):\n",
        "        pass\n",
        "    else:\n",
        "        result = AIMessage(**result.dict(exclude={\"type\", \"name\"}), name=name)\n",
        "    return {\n",
        "        \"messages\": [result],\n",
        "        # track the sender so we know who to pass to next.\n",
        "        \"sender\": name,\n",
        "    }"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 83,
      "metadata": {
        "id": "bNNZHSgvsPZN"
      },
      "outputs": [],
      "source": [
        "from langgraph.prebuilt import ToolNode\n",
        "\n",
        "chatbot_node = functools.partial(\n",
        "    agent_node, agent=chatbot_agent, name=\"Factory Safety Assistant Agent( FSAA)\"\n",
        ")\n",
        "tool_node = ToolNode(toolbox, name=\"tools\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "rmzk1RESsbMw"
      },
      "source": [
        "## Agentic Workflow Definition\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 84,
      "metadata": {
        "id": "ybxapMBzsZl5"
      },
      "outputs": [],
      "source": [
        "from langgraph.graph import END, StateGraph\n",
        "from langgraph.prebuilt import tools_condition\n",
        "\n",
        "workflow = StateGraph(AgentState)\n",
        "\n",
        "workflow.add_node(\"chatbot\", chatbot_node)\n",
        "workflow.add_node(\"tools\", tool_node)\n",
        "\n",
        "workflow.set_entry_point(\"chatbot\")\n",
        "workflow.add_conditional_edges(\"chatbot\", tools_condition, {\"tools\": \"tools\", END: END})\n",
        "\n",
        "workflow.add_edge(\"tools\", \"chatbot\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 85,
      "metadata": {
        "id": "Kh9c2Htesfzc"
      },
      "outputs": [],
      "source": [
        "from motor.motor_asyncio import AsyncIOMotorClient\n",
        "\n",
        "mongo_client = AsyncIOMotorClient(MONGO_URI)\n",
        "mongodb_checkpointer = MongoDBSaver(mongo_client, DB_NAME, \"state_store\")\n",
        "\n",
        "graph = workflow.compile(checkpointer=mongodb_checkpointer)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 86,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 236
        },
        "id": "zlLNEWF6siGF",
        "outputId": "4d03ebc6-9583-4f38-aac7-4a35bda80728"
      },
      "outputs": [
        {
          "data": {
            "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/4gHYSUNDX1BST0ZJTEUAAQEAAAHIAAAAAAQwAABtbnRyUkdCIFhZWiAH4AABAAEAAAAAAABhY3NwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAA9tYAAQAAAADTLQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlkZXNjAAAA8AAAACRyWFlaAAABFAAAABRnWFlaAAABKAAAABRiWFlaAAABPAAAABR3dHB0AAABUAAAABRyVFJDAAABZAAAAChnVFJDAAABZAAAAChiVFJDAAABZAAAAChjcHJ0AAABjAAAADxtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAAgAAAAcAHMAUgBHAEJYWVogAAAAAAAAb6IAADj1AAADkFhZWiAAAAAAAABimQAAt4UAABjaWFlaIAAAAAAAACSgAAAPhAAAts9YWVogAAAAAAAA9tYAAQAAAADTLXBhcmEAAAAAAAQAAAACZmYAAPKnAAANWQAAE9AAAApbAAAAAAAAAABtbHVjAAAAAAAAAAEAAAAMZW5VUwAAACAAAAAcAEcAbwBvAGcAbABlACAASQBuAGMALgAgADIAMAAxADb/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCADbAMcDASIAAhEBAxEB/8QAHQABAAEFAQEBAAAAAAAAAAAAAAYDBAUHCAEJAv/EAFcQAAEDBAADAgcICg0KBwAAAAECAwQABQYRBxIhEzEIFBYiQVGUFyMyVVZh0dMVQnF0dYGRk5XSCSQ0NjhSU1SSsbK01Bg1N2JjcoKhs8EzRVeDhMPx/8QAGgEBAQADAQEAAAAAAAAAAAAAAAECAwQFB//EADMRAQABAwAFCgQHAQAAAAAAAAABAgMRBBIhMVETFEFSYXGRobHBFSMz0QUiQlOB4fAy/9oADAMBAAIRAxEAPwD6p0pSgUpSgUq0ulzj2a3vzZSilhlPMeVJUpR7glKR1UonQCR1JIA6msH5PS8m9/vzjrMVWy3Z47pQhCfR2yknbi/WAeQb0ArXOrbTRExrVTiP9uXDMyb7bYThRIuEVhY6FLr6UkfiJqj5VWX44ge0o+mqUfC8fiNhDFitrSAANIiNju6D0VV8lbL8TwPZkfRWfye3yNh5VWX44ge0o+mnlVZfjiB7Sj6aeStl+J4HsyPop5K2X4ngezI+inye3yXYeVVl+OIHtKPpp5VWX44ge0o+mnkrZfieB7Mj6KeStl+J4HsyPop8nt8jYeVVl+OIHtKPpr1GTWdxQSi7QVKPoTJQT/XXnkrZfieB7Mj6K8XidjcQUqs1vUk9CDFQQf8AlT5Pb5GxlEqC0hSSFJI2CDsEV7UYXgUGCtT9gUrHZZPN+0hqOs/7Rj4CgfSQArv0oE7rI2O8uT1vw5jHilzi6DzIO0LB7nGz6UK0dHvBBB6isaqIxrUTmPCUxwZalKVpQpSlApSlApSlApSlApSlApSlBF7tq7ZxabcvSo0FhdycQftneYNs/dA26rr6Qg942JRUYdHifElhxewifa1NIVrpzMu82t+sh4kf7p9VSeui7uoiN2PvnzWSlKVzogELjxg9yyi5Y7DvDky7W5T6JDUaBJcQHGUlTraXUtlC3EgHaEqKtjWt9KjPCnwnsb4h8M5mYXBqXYGIBWqah+BK7NpHbuNNcjimUh5RCBsN8xSVaIB6VEcOF4xzwgDBwuyZbbMVuVzuEjJoN8txRam3OVSkzIUhXpddCT2aFKBCySlBFRzF7nnWHeDvcMIs+O5PasssU91MuZGtaldpCcualOuwHFAtvu+LuFSUjZ2D02BQbytXhBYDecQyDJ4t+3aMfSV3VTsOQ0/DTy821sLbDo2Oo8zro63qopnfhY4pjFpsdxtbc++Q7je41qVJZtc3sg24dreaUGCH9J6pDZPOT5pOtVo27YbeJdl4+ps2N53Jh5DiERFreyNiVIlz3mTIS4kdpzOJVt1PK0oJVrZSnl61vbj9Ybinh7g8202WZdE41kNpusm3W1guSfFmFgOBpodVqSDvlHXoaDb9nu0e+2mHconbeKy2UvteMMLYc5VDY5m3AlaDo9UqAI7iBV5WNxy+N5LZIlzaiTYDclPOmPcoy40hA2RpbawFJPTeiPSKyVAqMZdq13Ow3lGkrbmIgPHr57MhQbCfzpZV+I+upPUYzxPjcWz29IJdl3WIUgDfRl0SFE+ocrKuvziuix9SInd093T5LG9J6UpXOhSlKBSlKBSlKBSlKBSlKBSlKDFZFZlXiI0WHEsXCI6JMN9YJDboBHUAglKkqUhQB6pWoAjvqna75Gvgft8poRrihJTJtzx2eXuKk7A52zvosDR7jogpGZrHXnHrdkLTbdwiNyeyJU04dpcaURoqQsaUg66bSQa3U1UzGrXu9P8Af7tvehA8GzhOkgjhviwI7iLQx+rXn+TXwn/9NsV/RDH6tSE4MW+kfIr7HR0AR44HdD7riVKP4zunkTI+VV+/PM/VVlqW+v5SYjikkeO1EjtMMtpaZaSEIbQNJSkDQAHoAFVKi/kTI+VV+/PM/VU8iZHyqv355n6qnJ2+v5SYjilFK598Fq9ZDxj4L2nKr9lF1Rc5UmW04Iamm2+VqS42nQLZPwUDfXvrbXkTI+VV+/PM/VU5O31/KTEcVhkXA7h5l15kXa94RYLvdJPL20ybbmnXXOVISnmUpJJ0lIH3AKx6vBv4UrSgK4cYuoIHKkG0sHlGydDzfWSfx1n/ACJkfKq/fnmfqqDCXiCFZPflpPTXbtD/AJhsGnJ2+v5SYjirWy04vwtx0RbdCt2NWZtZUmPEaSw12ij3JQkDalH0AbJ7tmvbPCkXW7C+z2DGKWlMwYq/htNqIKlrHoWrlT0+1AA7yqqlrwu1WqaJoadmXAAgTJz65Dqd94SpZPID6k6HzVnak1U0RMW+np+xsjcUpStCFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoOd/AD/gw499+3H++vV0RXO/gB/wYce+/bj/fXq6IoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoFKUoOd/AD/gw499+3H++vV0RXO/gB/wYce+/bj/AH16uiKBSlKBSlKBSlKBSlKBSlKBSlKBSlKBSlKBSlKBSlWN6vEew25yZJ51IQUpS22nmW4tRCUoSPSSSAPu+irETVMRG8X1KhJyHLnTzt2qzsIPUNuznVLA+chrW/ubHzmvPs7mH8wsftb31ddfNa+MeMLhN64b/ZP+Baspwm2cSbZHLlxsAEO48g2VQlrJQr/23FHu9Dqieia6t+zuYfzCx+1vfV1j8hTkWVWG42W6Wewy7bcI7kSTHXLe040tJSpJ979IJpzWvjHjBh8x/wBjy4KOcU+O8K9yW1CyYkpu6vuDYCpIVuM3sdx508/qIaUPTX1/rnXwdODF08HDBXccszFpuCpEtyZJnyJDiXHlK0EggN6ASgJTodN7PTmNbT+zuYfzCx+1vfV05rXxjxgwm9KhH2dzD+YWP2t76uvRfcw2NwLJr06lvfV05rXxjxgwm1KwWO5Iu6vPQp0VMC6sIS44whztG1oUSAttek8w2CDsAgjqNFJOdrmroqtzq1b0KUpWAUpSgUpSgUpSgUpSgUpSgUpSgVEOJJ/aVkHoN3jbH/ETUvqIcSf3HY/wvG/rNdWjfWpZU715Vpd7xAx+2yLjdJsa22+MguPy5bqWmmk+lSlqICR85NQ7jnxBl8LeFV+yWBHalXCKhpqK3IJDXbOvIZbK9deUKcSTr0A9RWq+O+KZfj3g58SXclzlzLEu2RY7BdrjxUMu7Gy2WgDy+gJXzH/WrfM4YukAQQCDsGlaIiZdlPDjiMux5ZmbF2ss7GJl78fetrUYWtyMtoOFIR8Jrld5tLKlDk+Ed1GuFvEvN7txEgY5cb5fZlmyaxTJltvF3scO3vNOtFrlejtoKttlLwPK+jewn4QJFTWHR1zvttstuXcLhcIsCAhSUKlSX0ttJUpYQkFSiACVkJA9JIHfV9XE1osl1h+AMw+7kUqeiaq1mJHkx2A3AIurQPJyISpYJIJ7RSj06EdannETi9nHg93LIIN4vTWdNPY6/eLVIkwWorsaS080yWnQyEpUyS+hXNoK80jfpqa/TI6dr8tuIdTzIUladkbSdjYOjWjY9wzzCOI2M4jf82VkTeX22eluai2x471rmMNoX2jISjlW2QtWkupUQUp2SCQfz4FtnuFv4CY1KmX6XdY0uKFR4chlhCIQDjgUlCm0JUoKJBPOVHp011qxVmcDcEEkcTYQHcbPJJ+fTzGv6zU5qCwv9J0H8Dyv+vHqdVhpW+nu95WegpSlcSFKUoFKUoFKUoFKUoFKUoFKUoFRDiT+47H+F439ZqX1g8wsj18tLaYpR43GkNS2UuHSVqQoHlJ0dcw2nejre+uq6NHqim7TMrG9gcuxO1Z1jNyx+9xEzrVcGVMSGFEjmSfUR1BB0QR1BAIrXrvg6wrhiN9xy7ZpmF9tt2gG3KTcri26qO1sHbfvQBX0A51hatempheOI1uxq2yp97h3WzRIjZdlPyre6WmEgecVOoSpvQ9YUR89UrFxSseUWmNdbMi6XW2SU87EyFapLzLo2RtK0tkEbBHQ+iu/ka5/SurKhlnCKw5tfkXO7eMyNWaZYnIgcCWXY0kt9rzdObm97ABChrZ6d2sFi3g/WzGMosGQLybJrzcrIw7DiKuk1txAjLQElkoS2kco0hXMAFkoTzKUBqpn5Zxviy/foSX9VTyzjD/yy/foSX9VTkK+rJqzwQBrwZcfawy74iL7kZxie8081azNR2cDkkpkhMdXZ86EladdVKIB0CDoi+tng8442b87fp94zOXebeq0yJeQSkuuIhqOyw12aEJQkq0okDmJAO9gVIMf4r4/llrbudjNxvNtdKkomW+2SX2VlKilQC0NkEhQIPXoQRWR8s43xZfv0JL+qpyFfVNWeCL4TwNteHZIxfpF8v8AlF0iRFQID9/mJf8AEWFEFaWglCBtXKkFauZZCQCqshwz4TwOFLE2HaLtd5FodWVxrVOkIdjwAVqWUMaQFBJKz0UpXcNVmPLON8WX79CS/qqw2TcZsWwtiM9kEmZYmZLwjsO3K3SI6XXD3ISVoAKj6qchXH6ZNWeCQwv9J0H8Dyv+vHqdVEMYt8q4X5d9kxXIDKYpiRmJAAdUFKSpa1J+12UpAB69CTrdS+uPSZiaoiOiEkpSlciFKUoFKUoFKUoFKUoFKUoFKV+VrS2hSlKCUpGyonQAoP1UK4scUofCTHI91lWi8X1yVMagRoFkhqkyHXnN8o0OgHQ9SR6ANkgG3unEK9HPcStNgxV6/wCM3eM5MmZSxLbESI0E+9hPUlxS1FBGteadp5tK5a/CrhXC4T2i5Qot3vF8duM924yZt6mKkvLcXoaBPQAJSkdB11s7NBQgYDe3+IuSXy85U/eMVucFEGJiT0RsRY6dDtVObG3FKPMOuvNWQebSdTlhhuMy2yy2lpptIQhtCQlKUgaAAHcBVSlArnHw8OOfuLcDZ7UCR2OR5FzWy38p0ttKh788PSOVB0CO5S0Gujq0N4QfgcYh4SeSW285PfMkhuW+J4oxEtcphthI51LUvlcZWedXMASCNhCenSg5N/Yu+Ov2HyO6cL7pICYl05rjai4r4MlKR2rQ/wB9tIUB3DslelVfSuvn14E/gYYZk+IYbxUkXjIo+RQbs7JbjxpTCYqjGlrShKklkrKVBsBQ5xvata3X0FoFUZUNic12UlhuQ3zJXyOoChzJIUk6PpBAIPoIFVqUGu5WI3/EMmzLMrXe7zk4nW/mi4ZJfaTFTLbQAnsHFAdkFhKUkb1tSlHmOtZnCM7GT43YZt4tj+I3m6trUmw3dxCZaVoJ50hIPnga5tjrylJITvQldRXMuFuK8QLrj9zyCyx7lcbBLTNtkpzYcjOgpO0kEbBKUkpOweUbHQUEqpWqnctyrhUxxByPiHPt9wwqC6mZaHbPCdM1mOokKaebGwrk8zShve1KJA6J2HjeRW7Lsft17tEjxu13GOiVFkBCkdo0tIUlWlAEbBB6gUGSpSlApSlApSlApSlApSlBiMqy6y4PZHbxkFzjWe1tLbbcmS3AhtClrShG1HoNqUkb+eoZcMfyTiTc85xfM7JbY/DqXGRCgLhznfHpnMnbq1lPKG09QkJ6EFB+Ekg1I+J1jtGRYBfYV+sqcjtXiyn3rUob8a7L31KB1HUqQnXz6rzhhmTfEHh7YMjatsizt3GIh8QJaSHY+xooVsDuI1vXXvoMrjOM2vDcft9jskJq3WmAyliNFZGktoHcB6T909SeprJ0pQKUpQKoy5bECK9KlPNxozCFOOvPLCUNoA2VKJ6AAAkk1j8qyuz4Pj06+364sWq0QWy7IlyVcqEJ/wC5J0AB1JIABJrmJiDk/huz25Vybn4jwJZcC2IBJZnZOQdhbmurcbYBAHVXeNnRQEp8ABQX4L2OLSQpKplxKVA7BHjr3UV0VVlZbJb8btMS12qExbrbEbSzHixmwhtpAGglKR0Aq9oFKUoFKUoPCNjR6ioZfOGpuufYzk8TI7xaE2ZpyO5aIcjUGaypJ0l1ojW0q5SFDrpOvURNKUEJ4d5tfsjRdmsqxVzDpsW4uxIqH5jT7c5kec260pJ2dpKdgjodjrogTatU8X4uEv5/wtXlEybGvTV4cVYG4oJbek9keZLuknSeX1kdfTW1qBSlKBSlKBSlKBSlflbiGxtagkf6x1Qak8Ibwl8e8Gq2We45LZb/AHKBc3XGESbNFbdbZcSEqCHVOOICVLBUUgbJDa/4tcVwv2TnPLlMXYcexy23a6Tr6tu2XC8pKdw3FlLDC47Kk6dG0bWHVDvGj0VX0H4kYJjnFfC7pi2RstTbVcGi24kqHM2r7VxBPwVpOiD6CK+aHCPwULzwx8OLFMYvTfjdlgyl3qHdkp96kx2EqcaX39FdoltKkk7ST6QQTcSPqzSqXjTP8s3/AEhTxpn+Wb/pCmJFWofxV4sYzwYw6XkuVXFMC3seahA852Q4R5rTSO9azru+6SQASMFxx4+49wMxpidcEu3a8XBzxa0WK3jnlXGR0AbbSN6G1J2rXTY6ElKTrnhVwEyLPcwicUuNamZ+TNefZMWbPNAsCCdjzeoW/wB21HeiAdkhJTBicV4XZT4UuQwc44twHLLg8RwSMf4fOKPvn8WTPH2yiD0bPdvRAHMF9TttoZbQ22hKG0AJSlI0AB3ACv1SgUpSgUpSgUpX4W6hvXOtKd93MdUH7q0uz8uLapr1vionT22VrjxXXuxS84EkpQV8quQE6HNo63vR7qreNM/yzf8ASFPGmf5Zv+kKuJHzoyD9lJQ5eIouXBeL4/apCykTrwFvRnRtKuQmKC2vvBPf6K698F3j3J8I7hs9lz+MLxVr7IOw2I65njQfQhCCXUr7NvpzKWjWj1bPX0Dhvw6PBbnveEbYpuJx0uRc/lBshA97jz9gPKWQPNSpJDpJ/wBqe5NfRnhrhVm4W4FYsTs6m0W+0xURmzsAuEdVOK19stRUo/Oo0xIlVKpeNM/yzf8ASFeiQ0ogB1BJ7gFCmJFSlKVApSlBa3Sb9jbZLl8vN2DK3eX18qSf+1a8teJWq/W6Jcrzb4l4uUplDz0mcwl5W1AEpTzDzUDuCRoaHr2anOVfvYvH3m9/YNR7Gv3uWr70a/sCvS0eZotzVTOJyy3Qsvc+xb5NWf2Br9WnufYt8mrP7A1+rUF4V+EVYuJIykuNSbMixzJiFvTYclljxVhYT2y3nWkIQo75i0TzoG9joTUgwjjbhXEWe/CsN7EqW1H8bLL8Z6MpbG9ds32qE9o3sgc6Np6jr1FbYv3J/XPimZ4s17n2LfJqz+wNfq09z7Fvk1Z/YGv1awGJceMEzq/os1kyBubPdS4uOkx3mm5SW/hlh1aAh4J9JbUrp17qjWD+EPa18HsTy7Npce1zr4XG241uivvF1xK3BpplAccOko2e/XedU5xc68+JmeLYZ4fYz0Ldgt0dwdUvRoyGXEH1pWgBST84IIqRYJdJF0sBMp0yJEaTIhqeOtuBp1SEqOgBzFKQToAb3rpVhZLzDyOzwrrbnvGIE1lEhh7lKedtQ2lWlAEbBHeK/XDP/Mlw/C07+8LrC9VNyzM1TnEx7rnMbUupSleWxKUpQKtbpdItlt8idNeTHiMIK3HFdwA+YdSfUB1J6CrqtQcdby47Os1jQrTBSudITv4RSQlofONlavuoTXZoejzpV+m1x9FhHMq4i3nLH3EsyJFntWyG4sdfZvOJ9BccT5wJ/ipIA3o82t1DVWG2uLUtyBHdcVrmW60FqV90nqavqV9Hs2qNHp1LUYhjrSx/k9aviyH7Oj6KeT1q+LIfs6PorIVELzxcxLH7y5a594QxKaUlDx7FxTTCla5UuupSUNk7HRSh3itlV2KIzVVj+TM8Wf8AJ61fFkP2dH0U8nrV8WQ/Z0fRUdvnGHEccuc633C7FmXAUgS0IivOCOFIStKnFJQQlBStPnkhPeN7BAu8o4mY1hz8Nm63RLL8tBdZaZacfWpsd7nK2lRCP9Y6Hz1jy9EZ/Pu37TM8WX8nrV8WQ/Z0fRQ47aiCPsZD0en7nR9FYLhPl0vPOHdkv85thqVOZLjiIySlsHmUPNBJPcB3k1Layoua9MVROyTM8VeyXG4Yu4ldmnv28JI94SoqYUPUWj5v4wAfURW8eH2fM5nDW28hMW7RwPGIyTtJB6BxBPek6+6D0PoJ0PV3Y7w5jeS2m6tq5Q1IQy91+Ew4oIcB9ethWvWgV5Wn6DRpVuaoj88bp9pWJzsl03SlK+ejF5V+9i8feb39g1Hsa/e5avvRr+wKkmRsrkY9dGm0lTi4rqUpHpJQQKjWLrS5jVpUk7SqIyQfWOQV6Fn6M9/svQ5mumJ5FeOH3Grhq1j93Yvd3u90u1umLiLTb5jLrqXm0CT8AKWNtlJIIO96FZDLrfe/CBym0/YTGL5h8a1Y3eYUiZfYKoPK/MjJZajtA9XAhQ5ypIKByJ0STXTlKaqOYceRe83d4LY5Hwq+YzIwyQzKvE25QTHjR0sQ3I6mGHT5rwcUsaLZI5Rs6rDY/YFWngfh9rv2NZ1Z8rxW4zI0O645a1SJEN/az2yEjmD0d1DoSTyqSrqDrWx1vSmqIjwkuOTXbhrjszMoiYOTvREKnsJSE8rnzpBISojRKR3EkeipHwz/AMyXD8LTv7wururbhqgpsMxf2rl0nKSdd48ZcG/+R/8Aysq9lirvj3XoSylKV5qFKUoFaQ43RVR81tUpX/hyoC2UnX2zbnMR+R0fkPqrd9RniBhyc0sJioWlmcwsPxHl70hwAjStfaqBKT8x33gV6X4fpFOjaTTXXu3T/Kw5/pSXGcjyJFvnxlR5TW0PxXh1Ho/4kn0EdCKho4MYEDsYbYwfwe1+rX0KaqpiJoxMd/8AUsEyrnKJhbNuumUWHJ7Hmdy+yl3kvtO2eXL+x8uNIXsFwNuJbQQFELCwOifTW2vcXwH5GWL9Htfq1MWWUR2kNNIS22hISlCRoJA6ACtFdmb2NeIjH8+sDTj2LzWPdrjtW2UWJkFlmCCytXjITbUt6bJHvh5hy9N9enfVhiarnw8yxm53PHbzdI92x22RWX4EJT7kR1hCg4w4kdW+YrCtnQ2Ds9Om9KVObRmKonExmfGZn3EA4CW2ZaOEGMw58R+BMajqDkaS2W3Gz2ijpST1B61P6jt+4dYtlE7x28Y7bLpL5A328uKhxfKO4bI3rqax3uLYD8jLF+j2v1a2UU126YopiJiNm/8AoTOqT8VVxciQW+rsuUzHQNb6qcSN/iGz+KsdYsZsmGwnmbRbYVmiLX2riIrSWUFWgOY6AG9ADfzVt3hLgj789jJLiypllpKvEI7iSFkqHKXlA93m7CR6lKPpFa9J0mNFszcr39HetO/Lb9KUr5mpUTlcPk9u4u2Xu5WNlaisxYYYWyFHqSlLrS+XZ66SQNknXWpZStlFyq3/AMyucIb5AXD5Z3v8xC/w9PIC4fLO9/mIX+HqZUrdzm52eEfYyhvkBcPlne/zEL/D08gLh8s73+Yhf4eplSnObnZ4R9jKII4fyF+bKyq9SmT8Jr9rM8w9I52mUrH3UqB9RFSmHDYt0RmLFZRHjMoDbbTSQlKEgaAAHcKrUrXXdrubKp9vQzkpSlaUKUpQKUpQYXJMNs2XNIRdYKJC2wQ28CUOt77+VxJCk/iPWoU9wDtalks329R0HuQFsLA+4VNE/lJrZ9K7LWmaRYjVt1zELlqz3AYPylvf5Iv1FPcBg/KW9/ki/UVtOlb/AInpf7np9jLVnuAwflLe/wAkX6inuAwflLe/yRfqK2nSnxPS/wBz0+xlqz3AYPylvf5Iv1FejgDA31yS9kf/ABR/9FbSpT4npf7noZQqwcIMcsMhuSph66S2yFIeuLna8pHcQjQQD84SDU1pSuK7euXqta5VMz2mSlKVpR//2Q==",
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "from IPython.display import Image, display\n",
        "\n",
        "try:\n",
        "    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))\n",
        "except Exception:\n",
        "    # This requires some extra dependencies and is optional\n",
        "    pass"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 87,
      "metadata": {
        "id": "Xa3E-9I8siph"
      },
      "outputs": [],
      "source": [
        "import re\n",
        "\n",
        "\n",
        "def sanitize_name(name: str) -> str:\n",
        "    \"\"\"Sanitize the name to match the pattern '^[a-zA-Z0-9_-]+$'.\"\"\"\n",
        "    return re.sub(r\"[^a-zA-Z0-9_-]\", \"_\", name)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 88,
      "metadata": {
        "id": "NVZl9B3fsmuA"
      },
      "outputs": [],
      "source": [
        "import asyncio\n",
        "\n",
        "from langchain_core.messages import HumanMessage\n",
        "\n",
        "\n",
        "async def chat_loop():\n",
        "    config = {\"configurable\": {\"thread_id\": \"0\"}}\n",
        "\n",
        "    while True:\n",
        "        user_input = await asyncio.get_event_loop().run_in_executor(\n",
        "            None, input, \"User: \"\n",
        "        )\n",
        "        if user_input.lower() in [\"quit\", \"exit\", \"q\"]:\n",
        "            print(\"Goodbye!\")\n",
        "            break\n",
        "\n",
        "        sanitized_name = (\n",
        "            sanitize_name(\"Human\") or \"Anonymous\"\n",
        "        )  # Fallback if sanitized name is empty\n",
        "        state = {\"messages\": [HumanMessage(content=user_input, name=sanitized_name)]}\n",
        "\n",
        "        print(\"Assistant: \", end=\"\", flush=True)\n",
        "\n",
        "        max_retries = 3\n",
        "        retry_delay = 1\n",
        "\n",
        "        for attempt in range(max_retries):\n",
        "            try:\n",
        "                async for chunk in graph.astream(state, config, stream_mode=\"values\"):\n",
        "                    if chunk.get(\"messages\"):\n",
        "                        last_message = chunk[\"messages\"][-1]\n",
        "                        if isinstance(last_message, AIMessage):\n",
        "                            last_message.name = (\n",
        "                                sanitize_name(last_message.name or \"AI\") or \"AI\"\n",
        "                            )\n",
        "                            print(last_message.content, end=\"\", flush=True)\n",
        "                    elif isinstance(last_message, ToolMessage):\n",
        "                        print(f\"\\n[Tool Used: {last_message.name}]\")\n",
        "                        print(f\"Tool Call ID: {last_message.tool_call_id}\")\n",
        "                        print(f\"Content: {last_message.content}\")\n",
        "                        print(\"Assistant: \", end=\"\", flush=True)\n",
        "                break\n",
        "            except Exception as e:\n",
        "                if attempt < max_retries - 1:\n",
        "                    print(f\"\\nAn unexpected error occurred: {e!s}\")\n",
        "                    print(f\"\\nRetrying in {retry_delay} seconds...\")\n",
        "                    await asyncio.sleep(retry_delay)\n",
        "                    retry_delay *= 2\n",
        "                else:\n",
        "                    print(f\"\\nMax retries reached. OpenAI API error: {e!s}\")\n",
        "                    break\n",
        "\n",
        "        print(\"\\n\")  # New line after the complete response"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dk905LiNsoLT"
      },
      "outputs": [],
      "source": [
        "# For Jupyter notebooks and IPython environments\n",
        "import nest_asyncio\n",
        "\n",
        "nest_asyncio.apply()\n",
        "\n",
        "# Run the async function\n",
        "await chat_loop()"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [
        "jbg6qsphi0RC",
        "N-XJmokEi9OQ",
        "5cVYxfbSq7Ek",
        "zdujfkT0rCBy",
        "gbuA68uMsHtV",
        "E1VZ2I2nsKzj",
        "rmzk1RESsbMw"
      ],
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "state": {}
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
